module Hledger.Read.CsvReader (
reader,
CsvRecord,
nullrules,
rulesFileFor,
parseCsvRulesFile,
transactionFromCsvRecord,
tests_Hledger_Read_CsvReader
)
where
import Control.Exception hiding (try)
import Control.Monad
import Control.Monad.Error
import Data.List
import Data.Maybe
import Data.Ord
import Data.Time.Format (parseTime)
import Safe
import System.Directory (doesFileExist)
import System.FilePath
import System.IO (stderr)
import System.Locale (defaultTimeLocale)
import Test.HUnit
import Text.CSV (parseCSV, CSV)
import Text.ParserCombinators.Parsec hiding (parse)
import Text.ParserCombinators.Parsec.Error
import Text.ParserCombinators.Parsec.Pos
import Text.Printf (hPrintf)
import Hledger.Data
import Prelude hiding (getContents)
import Hledger.Utils.UTF8IOCompat (getContents)
import Hledger.Utils
import Hledger.Data.FormatStrings as FormatStrings
import Hledger.Read.JournalReader (accountname, amount)
reader :: Reader
reader = Reader format detect parse
format :: String
format = "csv"
detect :: FilePath -> String -> Bool
detect f _ = takeExtension f == '.':format
parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
parse rulesfile f s =
do
r <- liftIO $ readJournalFromCsv rulesfile f s
case r of Left e -> throwError e
Right j -> return j
nullrules = CsvRules {
dateField=Nothing,
dateFormat=Nothing,
statusField=Nothing,
codeField=Nothing,
descriptionField=[],
amountField=Nothing,
amountInField=Nothing,
amountOutField=Nothing,
currencyField=Nothing,
baseCurrency=Nothing,
accountField=Nothing,
account2Field=Nothing,
effectiveDateField=Nothing,
baseAccount="unknown",
accountRules=[]
}
type CsvRecord = [String]
readJournalFromCsv :: Maybe FilePath -> FilePath -> String -> IO (Either String Journal)
readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when converting stdin"
readJournalFromCsv mrulesfile csvfile csvdata =
handle (\e -> return $ Left $ show (e :: IOException)) $ do
csvparse <- parseCsv csvfile csvdata
let rs = case csvparse of
Left e -> throw $ userError $ show e
Right rs -> filter (/= [""]) rs
badrecords = take 1 $ filter ((< 2).length) rs
records = case badrecords of
[] -> rs
(_:_) -> throw $ userError $ "Parse error: at least one CSV record has less than two fields:\n"++(show $ head badrecords)
let rulesfile = fromMaybe (rulesFileFor csvfile) mrulesfile
created <- records `seq` ensureRulesFileExists rulesfile
if created
then hPrintf stderr "creating default conversion rules file %s, edit this file for better results\n" rulesfile
else hPrintf stderr "using conversion rules file %s\n" rulesfile
rules <- liftM (either (throw.userError.show) id) $ parseCsvRulesFile rulesfile
let requiredfields = (maxFieldIndex rules + 1)
badrecords = take 1 $ filter ((< requiredfields).length) records
return $ case badrecords of
[] -> Right nulljournal{jtxns=sortBy (comparing tdate) $ map (transactionFromCsvRecord rules) records}
(_:_) -> Left $ "Parse error: at least one CSV record does not contain a field referenced by the conversion rules file:\n"++(show $ head badrecords)
ensureRulesFileExists :: FilePath -> IO Bool
ensureRulesFileExists f = do
exists <- doesFileExist f
if exists
then return False
else do
writeFile f newRulesFileContent
return True
parseCsv :: FilePath -> String -> IO (Either ParseError CSV)
parseCsv path csvdata =
case path of
"-" -> liftM (parseCSV "(stdin)") getContents
_ -> return $ parseCSV path csvdata
maxFieldIndex :: CsvRules -> Int
maxFieldIndex r = maximumDef (1) $ catMaybes [
dateField r
,statusField r
,codeField r
,amountField r
,amountInField r
,amountOutField r
,currencyField r
,accountField r
,account2Field r
,effectiveDateField r
]
rulesFileFor :: FilePath -> FilePath
rulesFileFor = flip replaceExtension ".rules"
newRulesFileContent :: String
newRulesFileContent = let prognameandversion = "hledger" in
"# csv conversion rules file generated by " ++ prognameandversion ++ "\n" ++
"# Add rules to this file for more accurate conversion, see\n"++
"# http://hledger.org/MANUAL.html#convert\n" ++
"\n" ++
"base-account assets:bank:checking\n" ++
"date-field 0\n" ++
"description-field 4\n" ++
"amount-field 1\n" ++
"base-currency $\n" ++
"\n" ++
"# account-assigning rules\n" ++
"\n" ++
"SPECTRUM\n" ++
"expenses:health:gym\n" ++
"\n" ++
"ITUNES\n" ++
"BLKBSTR=BLOCKBUSTER\n" ++
"expenses:entertainment\n" ++
"\n" ++
"(TO|FROM) SAVINGS\n" ++
"assets:bank:savings\n"
parseCsvRulesFile :: FilePath -> IO (Either ParseError CsvRules)
parseCsvRulesFile f = do
s <- readFile f
let rules = parseCsvRules f s
return $ case rules of
Left e -> Left e
Right r -> case validateRules r of
Left e -> Left $ toParseError e
Right r -> Right r
where
toParseError s = newErrorMessage (Message s) (initialPos "")
parseCsvRules :: FilePath -> String -> Either ParseError CsvRules
parseCsvRules rulesfile s = runParser csvrulesfile nullrules{baseAccount=takeBaseName rulesfile} rulesfile s
csvrulesfile :: GenParser Char CsvRules CsvRules
csvrulesfile = do
many blankorcommentline
many definitions
r <- getState
ars <- many accountrule
many blankorcommentline
eof
return r{accountRules=ars}
definitions :: GenParser Char CsvRules ()
definitions = do
choice' [
datefield
,dateformat
,statusfield
,codefield
,descriptionfield
,amountfield
,amountinfield
,amountoutfield
,currencyfield
,accountfield
,account2field
,effectivedatefield
,basecurrency
,baseaccount
,commentline
] <?> "definition"
return ()
datefield = do
string "date-field"
many1 spacenonewline
v <- restofline
updateState (\r -> r{dateField=readMay v})
effectivedatefield = do
string "effective-date-field"
many1 spacenonewline
v <- restofline
updateState (\r -> r{effectiveDateField=readMay v})
dateformat = do
string "date-format"
many1 spacenonewline
v <- restofline
updateState (\r -> r{dateFormat=Just v})
codefield = do
string "code-field"
many1 spacenonewline
v <- restofline
updateState (\r -> r{codeField=readMay v})
statusfield = do
string "status-field"
many1 spacenonewline
v <- restofline
updateState (\r -> r{statusField=readMay v})
descriptionFieldValue :: GenParser Char st [FormatString]
descriptionFieldValue = do
try fieldNo
<|> formatStrings
where
fieldNo = many1 digit >>= \x -> return [FormatField False Nothing Nothing $ FieldNo $ read x]
descriptionfield = do
string "description-field"
many1 spacenonewline
formatS <- descriptionFieldValue
restofline
updateState (\x -> x{descriptionField=formatS})
amountfield = do
string "amount-field"
many1 spacenonewline
v <- restofline
x <- updateState (\r -> r{amountField=readMay v})
return x
amountinfield = do
choice [string "amount-in-field", string "in-field"]
many1 spacenonewline
v <- restofline
updateState (\r -> r{amountInField=readMay v})
amountoutfield = do
choice [string "amount-out-field", string "out-field"]
many1 spacenonewline
v <- restofline
updateState (\r -> r{amountOutField=readMay v})
currencyfield = do
string "currency-field"
many1 spacenonewline
v <- restofline
updateState (\r -> r{currencyField=readMay v})
accountfield = do
string "account-field"
many1 spacenonewline
v <- restofline
updateState (\r -> r{accountField=readMay v})
account2field = do
string "account2-field"
many1 spacenonewline
v <- restofline
updateState (\r -> r{account2Field=readMay v})
basecurrency = do
choice [string "base-currency", string "currency"]
many1 spacenonewline
v <- restofline
updateState (\r -> r{baseCurrency=Just v})
baseaccount = do
string "base-account"
many1 spacenonewline
v <- accountname
optional newline
updateState (\r -> r{baseAccount=v})
accountrule :: GenParser Char CsvRules AccountRule
accountrule = do
many blankorcommentline
pats <- many1 matchreplacepattern
guard $ length pats >= 2
let pats' = init pats
acct = either (fail.show) id $ runParser accountname () "" $ fst $ last pats
many blankorcommentline
return (pats',acct)
<?> "account rule"
blankline = many spacenonewline >> newline >> return () <?> "blank line"
commentchar = oneOf ";#"
commentline = many spacenonewline >> commentchar >> restofline >> return () <?> "comment line"
blankorcommentline = choice' [blankline, commentline]
matchreplacepattern = do
notFollowedBy commentchar
matchpat <- many1 (noneOf "=\n")
replpat <- optionMaybe $ do {char '='; many $ noneOf "\n"}
newline
return (matchpat,replpat)
validateRules :: CsvRules -> Either String CsvRules
validateRules rules =
let hasAmount = isJust $ amountField rules
hasIn = isJust $ amountInField rules
hasOut = isJust $ amountOutField rules
in case (hasAmount, hasIn, hasOut) of
(True, True, _) -> Left "Don't specify amount-in-field when specifying amount-field"
(True, _, True) -> Left "Don't specify amount-out-field when specifying amount-field"
(_, False, True) -> Left "Please specify amount-in-field when specifying amount-out-field"
(_, True, False) -> Left "Please specify amount-out-field when specifying amount-in-field"
(False, False, False) -> Left "Please specify either amount-field, or amount-in-field and amount-out-field"
_ -> Right rules
formatD :: CsvRecord -> Bool -> Maybe Int -> Maybe Int -> HledgerFormatField -> String
formatD record leftJustified min max f = case f of
FieldNo n -> maybe "" show $ atMay record n
AccountField -> ""
DepthSpacerField -> ""
TotalField -> ""
DefaultDateField -> ""
DescriptionField -> ""
where
show = formatValue leftJustified min max
formatDescription :: CsvRecord -> [FormatString] -> String
formatDescription _ [] = ""
formatDescription record (f:fs) = s ++ (formatDescription record fs)
where s = case f of
FormatLiteral l -> l
FormatField leftJustified min max field -> formatD record leftJustified min max field
transactionFromCsvRecord :: CsvRules -> CsvRecord -> Transaction
transactionFromCsvRecord rules fields =
let
date = parsedate $ normaliseDate (dateFormat rules) $ maybe "1900/1/1" (atDef "" fields) (dateField rules)
effectivedate = do idx <- effectiveDateField rules
return $ parsedate $ normaliseDate (dateFormat rules) $ (atDef "" fields) idx
status = maybe False (null . strip . (atDef "" fields)) (statusField rules)
code = maybe "" (atDef "" fields) (codeField rules)
desc = formatDescription fields (descriptionField rules)
comment = ""
precomment = ""
baseacc = maybe (baseAccount rules) (atDef "" fields) (accountField rules)
amountstr = getAmount rules fields
amountstr' = strnegate amountstr where strnegate ('-':s) = s
strnegate s = '-':s
currency = maybe (fromMaybe "" $ baseCurrency rules) (atDef "" fields) (currencyField rules)
amountstr'' = currency ++ amountstr'
amountparse = runParser amount nullctx "" amountstr''
a = either (const nullmixedamt) id amountparse
baseamount = costOfMixedAmount a
unknownacct | (readDef 0 amountstr' :: Double) < 0 = "income:unknown"
| otherwise = "expenses:unknown"
(acct',newdesc) = identify (accountRules rules) unknownacct desc
acct = maybe acct' (atDef "" fields) (account2Field rules)
t = Transaction {
tdate=date,
teffectivedate=effectivedate,
tstatus=status,
tcode=code,
tdescription=newdesc,
tcomment=comment,
tpreceding_comment_lines=precomment,
ttags=[],
tpostings=[
Posting {
pstatus=False,
paccount=acct,
pamount=a,
pcomment="",
ptype=RegularPosting,
ptags=[],
ptransaction=Just t
},
Posting {
pstatus=False,
paccount=baseacc,
pamount=(baseamount),
pcomment="",
ptype=RegularPosting,
ptags=[],
ptransaction=Just t
}
]
}
in t
normaliseDate :: Maybe String
-> String -> String
normaliseDate mb_user_format s =
let parsewith = flip (parseTime defaultTimeLocale) s in
maybe (error' $ "could not parse \""++s++"\" as a date, consider adding a date-format directive or upgrading")
showDate $
firstJust $ (map parsewith $
maybe [] (:[]) mb_user_format
)
++ [
parseTime defaultTimeLocale "%Y/%m/%e" s
,parseTime defaultTimeLocale "%Y-%m-%e" s
,parseTime defaultTimeLocale "%m/%e/%Y" s
,parseTime defaultTimeLocale "%m-%e-%Y" s
,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s)
,parseTime defaultTimeLocale "%Y-%m-%e" (take 5 s ++ "0" ++ drop 5 s)
,parseTime defaultTimeLocale "%m/%e/%Y" ('0':s)
,parseTime defaultTimeLocale "%m-%e-%Y" ('0':s)
]
identify :: [AccountRule] -> String -> String -> (String,String)
identify rules defacct desc | null matchingrules = (defacct,desc)
| otherwise = (acct,newdesc)
where
matchingrules = filter ismatch rules :: [AccountRule]
where ismatch = any ((`regexMatchesCI` desc) . fst) . fst
(prs,acct) = head matchingrules
p_ms_r = filter (\(_,m,_) -> m) $ map (\(p,r) -> (p, p `regexMatchesCI` desc, r)) prs
(p,_,r) = head p_ms_r
newdesc = case r of Just repl -> regexReplaceCI p repl desc
Nothing -> desc
getAmount :: CsvRules -> CsvRecord -> String
getAmount rules fields = case amountField rules of
Just f -> maybe "" (atDef "" fields) $ Just f
Nothing ->
case (i, o) of
(x, "") -> x
("", x) -> "-"++x
p -> error' $ "using amount-in-field and amount-out-field, found a value in both fields: "++show p
where
i = maybe "" (atDef "" fields) (amountInField rules)
o = maybe "" (atDef "" fields) (amountOutField rules)
tests_Hledger_Read_CsvReader = TestList (test_parser ++ test_description_parsing)
test_description_parsing = [
"description-field 1" ~: assertParseDescription "description-field 1\n" [FormatField False Nothing Nothing (FieldNo 1)]
, "description-field 1 " ~: assertParseDescription "description-field 1 \n" [FormatField False Nothing Nothing (FieldNo 1)]
, "description-field %(1)" ~: assertParseDescription "description-field %(1)\n" [FormatField False Nothing Nothing (FieldNo 1)]
, "description-field %(1)/$(2)" ~: assertParseDescription "description-field %(1)/%(2)\n" [
FormatField False Nothing Nothing (FieldNo 1)
, FormatLiteral "/"
, FormatField False Nothing Nothing (FieldNo 2)
]
]
where
assertParseDescription string expected = do assertParseEqual (parseDescription string) (nullrules {descriptionField = expected})
parseDescription :: String -> Either ParseError CsvRules
parseDescription x = runParser descriptionfieldWrapper nullrules "(unknown)" x
descriptionfieldWrapper :: GenParser Char CsvRules CsvRules
descriptionfieldWrapper = do
descriptionfield
r <- getState
return r
test_parser = [
"convert rules parsing: empty file" ~: do
assertParseEqual (parseCsvRules "unknown" "") nullrules
,"convert rules parsing: accountrule" ~: do
assertParseEqual (parseWithCtx nullrules accountrule "A\na\n")
([("A",Nothing)], "a")
,"convert rules parsing: trailing comments" ~: do
assertParse (parseWithCtx nullrules csvrulesfile "A\na\n# \n#\n")
,"convert rules parsing: trailing blank lines" ~: do
assertParse (parseWithCtx nullrules csvrulesfile "A\na\n\n \n")
]