module Hledger.Cli.Commands.Convert where
import Hledger.Cli.Options (Opt(Debug))
import Hledger.Cli.Version (versionstr)
import Hledger.Data.Types (Journal,AccountName,Transaction(..),Posting(..),PostingType(..))
import Hledger.Data.Utils (strip, spacenonewline, restofline, parseWithCtx, assertParse, assertParseEqual, error')
import Hledger.Read.Common (emptyCtx)
import Hledger.Read.Journal (someamount,ledgeraccountname)
import Hledger.Data.Amount (nullmixedamt)
import Safe (atDef, maximumDef)
import System.IO (stderr)
import Text.CSV (parseCSVFromFile, printCSV)
import Text.Printf (hPrintf)
import Text.RegexPR (matchRegexPR, gsubRegexPR)
import Data.Maybe
import Hledger.Data.Dates (firstJust, showDate, parsedate)
import System.Locale (defaultTimeLocale)
import Data.Time.Format (parseTime)
import Control.Monad (when, guard, liftM)
import Safe (readDef, readMay)
import System.Directory (doesFileExist)
import System.Exit (exitFailure)
import System.FilePath (takeBaseName, replaceExtension)
import Text.ParserCombinators.Parsec
import Test.HUnit
data CsvRules = CsvRules {
dateField :: Maybe FieldPosition,
statusField :: Maybe FieldPosition,
codeField :: Maybe FieldPosition,
descriptionField :: Maybe FieldPosition,
amountField :: Maybe FieldPosition,
currencyField :: Maybe FieldPosition,
baseCurrency :: Maybe String,
baseAccount :: AccountName,
accountRules :: [AccountRule]
} deriving (Show, Eq)
nullrules = CsvRules {
dateField=Nothing,
statusField=Nothing,
codeField=Nothing,
descriptionField=Nothing,
amountField=Nothing,
currencyField=Nothing,
baseCurrency=Nothing,
baseAccount="unknown",
accountRules=[]
}
type FieldPosition = Int
type AccountRule = (
[(String, Maybe String)]
,AccountName
)
type CsvRecord = [String]
convert :: [Opt] -> [String] -> Journal -> IO ()
convert opts args _ = do
when (null args) $ error' "please specify a csv data file."
let csvfile = head args
csvparse <- parseCSVFromFile csvfile
let records = case csvparse of
Left e -> error' $ show e
Right rs -> reverse $ filter (/= [""]) rs
let debug = Debug `elem` opts
rulesfile = rulesFileFor csvfile
exists <- doesFileExist rulesfile
if (not exists) then do
hPrintf stderr "creating conversion rules file %s, edit this file for better results\n" rulesfile
writeFile rulesfile initialRulesFileContent
else
hPrintf stderr "using conversion rules file %s\n" rulesfile
rules <- liftM (either (error'.show) id) $ parseCsvRulesFile rulesfile
when debug $ hPrintf stderr "rules: %s\n" (show rules)
let requiredfields = max 2 (maxFieldIndex rules + 1)
badrecords = take 1 $ filter ((< requiredfields).length) records
if null badrecords
then mapM_ (printTxn debug rules) records
else do
hPrintf stderr (unlines [
"Warning, at least one CSV record does not contain a field referenced by the"
,"conversion rules file, or has less than two fields. Are you converting a"
,"valid CSV file ? First bad record:\n%s"
]) (show $ head badrecords)
exitFailure
maxFieldIndex :: CsvRules -> Int
maxFieldIndex r = maximumDef (1) $ catMaybes [
dateField r
,statusField r
,codeField r
,descriptionField r
,amountField r
,currencyField r
]
rulesFileFor :: FilePath -> FilePath
rulesFileFor csvfile = replaceExtension csvfile ".rules"
initialRulesFileContent :: String
initialRulesFileContent =
"# csv conversion rules file generated by hledger "++versionstr++"\n" ++
"# Add rules to this file for more accurate conversion, see\n"++
"# http://hledger.org/MANUAL.html#convert\n" ++
"\n" ++
"base-account assets:bank:checking\n" ++
"date-field 0\n" ++
"description-field 4\n" ++
"amount-field 1\n" ++
"currency $\n" ++
"\n" ++
"# account-assigning rules\n" ++
"\n" ++
"SPECTRUM\n" ++
"expenses:health:gym\n" ++
"\n" ++
"ITUNES\n" ++
"BLKBSTR=BLOCKBUSTER\n" ++
"expenses:entertainment\n" ++
"\n" ++
"(TO|FROM) SAVINGS\n" ++
"assets:bank:savings\n"
parseCsvRulesFile :: FilePath -> IO (Either ParseError CsvRules)
parseCsvRulesFile f = do
s <- readFile f
return $ parseCsvRules f s
parseCsvRules :: FilePath -> String -> Either ParseError CsvRules
parseCsvRules rulesfile s = runParser csvrulesfile nullrules{baseAccount=takeBaseName rulesfile} rulesfile s
csvrulesfile :: GenParser Char CsvRules CsvRules
csvrulesfile = do
many blankorcommentline
many definitions
r <- getState
ars <- many accountrule
many blankorcommentline
eof
return r{accountRules=ars}
choice' parsers = choice $ map try (init parsers) ++ [last parsers]
definitions :: GenParser Char CsvRules ()
definitions = do
choice' [
datefield
,statusfield
,codefield
,descriptionfield
,amountfield
,currencyfield
,basecurrency
,baseaccount
,commentline
] <?> "definition"
return ()
datefield = do
string "date-field"
many1 spacenonewline
v <- restofline
r <- getState
setState r{dateField=readMay v}
codefield = do
string "code-field"
many1 spacenonewline
v <- restofline
r <- getState
setState r{codeField=readMay v}
statusfield = do
string "status-field"
many1 spacenonewline
v <- restofline
r <- getState
setState r{statusField=readMay v}
descriptionfield = do
string "description-field"
many1 spacenonewline
v <- restofline
r <- getState
setState r{descriptionField=readMay v}
amountfield = do
string "amount-field"
many1 spacenonewline
v <- restofline
r <- getState
setState r{amountField=readMay v}
currencyfield = do
string "currency-field"
many1 spacenonewline
v <- restofline
r <- getState
setState r{currencyField=readMay v}
basecurrency = do
string "currency"
many1 spacenonewline
v <- restofline
r <- getState
setState r{baseCurrency=Just v}
baseaccount = do
string "base-account"
many1 spacenonewline
v <- ledgeraccountname
optional newline
r <- getState
setState r{baseAccount=v}
accountrule :: GenParser Char CsvRules AccountRule
accountrule = do
many blankorcommentline
pats <- many1 matchreplacepattern
guard $ length pats >= 2
let pats' = init pats
acct = either (fail.show) id $ runParser ledgeraccountname () "" $ fst $ last pats
many blankorcommentline
return (pats',acct)
<?> "account rule"
blanklines = many1 blankline >> return ()
blankline = many spacenonewline >> newline >> return () <?> "blank line"
commentchar = oneOf ";#"
commentline = many spacenonewline >> commentchar >> restofline >> return () <?> "comment line"
blankorcommentline = choice' [blankline, commentline]
matchreplacepattern = do
notFollowedBy commentchar
matchpat <- many1 (noneOf "=\n")
replpat <- optionMaybe $ do {char '='; many $ noneOf "\n"}
newline
return (matchpat,replpat)
printTxn :: Bool -> CsvRules -> CsvRecord -> IO ()
printTxn debug rules rec = do
when debug $ hPrintf stderr "record: %s" (printCSV [rec])
putStr $ show $ transactionFromCsvRecord rules rec
transactionFromCsvRecord :: CsvRules -> CsvRecord -> Transaction
transactionFromCsvRecord rules fields =
let
date = parsedate $ normaliseDate $ maybe "1900/1/1" (atDef "" fields) (dateField rules)
status = maybe False (null . strip . (atDef "" fields)) (statusField rules)
code = maybe "" (atDef "" fields) (codeField rules)
desc = maybe "" (atDef "" fields) (descriptionField rules)
comment = ""
precomment = ""
amountstr = maybe "" (atDef "" fields) (amountField rules)
amountstr' = strnegate amountstr where strnegate ('-':s) = s
strnegate s = '-':s
currency = maybe (fromMaybe "" $ baseCurrency rules) (atDef "" fields) (currencyField rules)
amountstr'' = currency ++ amountstr'
amountparse = runParser someamount emptyCtx "" amountstr''
amount = either (const nullmixedamt) id amountparse
unknownacct | (readDef 0 amountstr' :: Double) < 0 = "income:unknown"
| otherwise = "expenses:unknown"
(acct,newdesc) = identify (accountRules rules) unknownacct desc
t = Transaction {
tdate=date,
teffectivedate=Nothing,
tstatus=status,
tcode=code,
tdescription=newdesc,
tcomment=comment,
tpreceding_comment_lines=precomment,
tpostings=[
Posting {
pstatus=False,
paccount=acct,
pamount=amount,
pcomment="",
ptype=RegularPosting,
ptransaction=Just t
},
Posting {
pstatus=False,
paccount=baseAccount rules,
pamount=(amount),
pcomment="",
ptype=RegularPosting,
ptransaction=Just t
}
]
}
in t
normaliseDate :: String -> String
normaliseDate s = maybe "0000/00/00" showDate $
firstJust
[parseTime defaultTimeLocale "%Y/%m/%e" s
,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s)
,parseTime defaultTimeLocale "%Y-%m-%e" s
,parseTime defaultTimeLocale "%Y-%m-%e" (take 5 s ++ "0" ++ drop 5 s)
,parseTime defaultTimeLocale "%m/%e/%Y" s
,parseTime defaultTimeLocale "%m/%e/%Y" ('0':s)
,parseTime defaultTimeLocale "%m-%e-%Y" s
,parseTime defaultTimeLocale "%m-%e-%Y" ('0':s)
]
identify :: [AccountRule] -> String -> String -> (String,String)
identify rules defacct desc | null matchingrules = (defacct,desc)
| otherwise = (acct,newdesc)
where
matchingrules = filter ismatch rules :: [AccountRule]
where ismatch = any (isJust . flip matchRegexPR (caseinsensitive desc) . fst) . fst
(prs,acct) = head matchingrules
p_ms_r = filter (\(_,m,_) -> isJust m) $ map (\(p,r) -> (p, matchRegexPR (caseinsensitive p) desc, r)) prs
(p,_,r) = head p_ms_r
newdesc = case r of Just rpat -> gsubRegexPR (caseinsensitive p) rpat desc
Nothing -> desc
caseinsensitive = ("(?i)"++)
tests_Convert = TestList [
"convert rules parsing: empty file" ~: do
assertParseEqual (parseCsvRules "unknown" "") nullrules
,"convert rules parsing: accountrule" ~: do
assertParseEqual (parseWithCtx nullrules accountrule "A\na\n")
([("A",Nothing)], "a")
,"convert rules parsing: trailing comments" ~: do
assertParse (parseWithCtx nullrules csvrulesfile "A\na\n# \n#\n")
,"convert rules parsing: trailing blank lines" ~: do
assertParse (parseWithCtx nullrules csvrulesfile "A\na\n\n \n")
]