{-| A reader for the CSV data format. Uses an extra rules file () to help interpret the data. Example: @ \"2012\/3\/22\",\"something\",\"10.00\" \"2012\/3\/23\",\"another\",\"5.50\" @ and rules file: @ date-field 0 description-field 1 amount-field 2 base-account assets:bank:checking SAVINGS assets:bank:savings @ -} module Hledger.Read.CsvReader ( -- * Reader reader, -- * Misc. CsvRecord, nullrules, rulesFileFor, parseCsvRulesFile, transactionFromCsvRecord, -- * Tests tests_Hledger_Read_CsvReader ) where import Control.Exception hiding (try) import Control.Monad import Control.Monad.Error -- import Test.HUnit import Data.List import Data.Maybe import Data.Ord import Data.Time.Format (parseTime) import Safe import System.Directory (doesFileExist) import System.FilePath import System.IO (stderr) import System.Locale (defaultTimeLocale) import Test.HUnit import Text.CSV (parseCSV, CSV) import Text.ParserCombinators.Parsec hiding (parse) import Text.ParserCombinators.Parsec.Error import Text.ParserCombinators.Parsec.Pos import Text.Printf (hPrintf) import Hledger.Data import Prelude hiding (getContents) import Hledger.Utils.UTF8IOCompat (getContents) import Hledger.Utils import Hledger.Data.FormatStrings as FormatStrings import Hledger.Read.JournalReader (accountname, amount) reader :: Reader reader = Reader format detect parse format :: String format = "csv" -- | Does the given file path and data look like CSV ? detect :: FilePath -> String -> Bool detect f _ = takeExtension f == '.':format -- | Parse and post-process a "Journal" from CSV data, or give an error. -- XXX currently ignores the string and reads from the file path parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal parse rulesfile f s = -- trace ("running "++format++" reader") $ do r <- liftIO $ readJournalFromCsv rulesfile f s case r of Left e -> throwError e Right j -> return j nullrules = CsvRules { dateField=Nothing, dateFormat=Nothing, statusField=Nothing, codeField=Nothing, descriptionField=[], amountField=Nothing, amountInField=Nothing, amountOutField=Nothing, currencyField=Nothing, baseCurrency=Nothing, accountField=Nothing, account2Field=Nothing, effectiveDateField=Nothing, baseAccount="unknown", accountRules=[] } type CsvRecord = [String] -- | Read a Journal from the given CSV data (and filename, used for error -- messages), or return an error. Proceed as follows: -- @ -- 1. parse the CSV data -- 2. identify the name of a file specifying conversion rules: either use -- the name provided, derive it from the CSV filename, or raise an error -- if the CSV filename is -. -- 3. auto-create the rules file with default rules if it doesn't exist -- 4. parse the rules file -- 5. convert the CSV records to a journal using the rules -- @ readJournalFromCsv :: Maybe FilePath -> FilePath -> String -> IO (Either String Journal) readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when converting stdin" readJournalFromCsv mrulesfile csvfile csvdata = handle (\e -> return $ Left $ show (e :: IOException)) $ do csvparse <- parseCsv csvfile csvdata let rs = case csvparse of Left e -> throw $ userError $ show e Right rs -> filter (/= [""]) rs badrecords = take 1 $ filter ((< 2).length) rs records = case badrecords of [] -> rs (_:_) -> throw $ userError $ "Parse error: at least one CSV record has less than two fields:\n"++(show $ head badrecords) let rulesfile = fromMaybe (rulesFileFor csvfile) mrulesfile created <- records `seq` ensureRulesFileExists rulesfile if created then hPrintf stderr "creating default conversion rules file %s, edit this file for better results\n" rulesfile else hPrintf stderr "using conversion rules file %s\n" rulesfile rules <- liftM (either (throw.userError.show) id) $ parseCsvRulesFile rulesfile let requiredfields = (maxFieldIndex rules + 1) badrecords = take 1 $ filter ((< requiredfields).length) records return $ case badrecords of [] -> Right nulljournal{jtxns=sortBy (comparing tdate) $ map (transactionFromCsvRecord rules) records} (_:_) -> Left $ "Parse error: at least one CSV record does not contain a field referenced by the conversion rules file:\n"++(show $ head badrecords) -- | Ensure there is a conversion rules file at the given path, creating a -- default one if needed and returning True in this case. ensureRulesFileExists :: FilePath -> IO Bool ensureRulesFileExists f = do exists <- doesFileExist f if exists then return False else do -- note Hledger.Utils.UTF8.* do no line ending conversion on windows, -- we currently require unix line endings on all platforms. writeFile f newRulesFileContent return True parseCsv :: FilePath -> String -> IO (Either ParseError CSV) parseCsv path csvdata = case path of "-" -> liftM (parseCSV "(stdin)") getContents _ -> return $ parseCSV path csvdata -- | The highest (0-based) field index referenced in the field -- definitions, or -1 if no fields are defined. maxFieldIndex :: CsvRules -> Int maxFieldIndex r = maximumDef (-1) $ catMaybes [ dateField r ,statusField r ,codeField r ,amountField r ,amountInField r ,amountOutField r ,currencyField r ,accountField r ,account2Field r ,effectiveDateField r ] -- rulesFileFor :: CliOpts -> FilePath -> FilePath -- rulesFileFor CliOpts{rules_file_=Just f} _ = f -- rulesFileFor CliOpts{rules_file_=Nothing} csvfile = replaceExtension csvfile ".rules" rulesFileFor :: FilePath -> FilePath rulesFileFor = flip replaceExtension ".rules" newRulesFileContent :: String newRulesFileContent = let prognameandversion = "hledger" in "# csv conversion rules file generated by " ++ prognameandversion ++ "\n" ++ "# Add rules to this file for more accurate conversion, see\n"++ "# http://hledger.org/MANUAL.html#convert\n" ++ "\n" ++ "base-account assets:bank:checking\n" ++ "date-field 0\n" ++ "description-field 4\n" ++ "amount-field 1\n" ++ "base-currency $\n" ++ "\n" ++ "# account-assigning rules\n" ++ "\n" ++ "SPECTRUM\n" ++ "expenses:health:gym\n" ++ "\n" ++ "ITUNES\n" ++ "BLKBSTR=BLOCKBUSTER\n" ++ "expenses:entertainment\n" ++ "\n" ++ "(TO|FROM) SAVINGS\n" ++ "assets:bank:savings\n" -- rules file parser parseCsvRulesFile :: FilePath -> IO (Either ParseError CsvRules) parseCsvRulesFile f = do s <- readFile f let rules = parseCsvRules f s return $ case rules of Left e -> Left e Right r -> case validateRules r of Left e -> Left $ toParseError e Right r -> Right r where toParseError s = newErrorMessage (Message s) (initialPos "") parseCsvRules :: FilePath -> String -> Either ParseError CsvRules parseCsvRules rulesfile s = runParser csvrulesfile nullrules{baseAccount=takeBaseName rulesfile} rulesfile s csvrulesfile :: GenParser Char CsvRules CsvRules csvrulesfile = do many blankorcommentline many definitions r <- getState ars <- many accountrule many blankorcommentline eof return r{accountRules=ars} definitions :: GenParser Char CsvRules () definitions = do choice' [ datefield ,dateformat ,statusfield ,codefield ,descriptionfield ,amountfield ,amountinfield ,amountoutfield ,currencyfield ,accountfield ,account2field ,effectivedatefield ,basecurrency ,baseaccount ,commentline ] "definition" return () datefield = do string "date-field" many1 spacenonewline v <- restofline updateState (\r -> r{dateField=readMay v}) effectivedatefield = do string "effective-date-field" many1 spacenonewline v <- restofline updateState (\r -> r{effectiveDateField=readMay v}) dateformat = do string "date-format" many1 spacenonewline v <- restofline updateState (\r -> r{dateFormat=Just v}) codefield = do string "code-field" many1 spacenonewline v <- restofline updateState (\r -> r{codeField=readMay v}) statusfield = do string "status-field" many1 spacenonewline v <- restofline updateState (\r -> r{statusField=readMay v}) descriptionFieldValue :: GenParser Char st [FormatString] descriptionFieldValue = do -- try (fieldNo <* spacenonewline) try fieldNo <|> formatStrings where fieldNo = many1 digit >>= \x -> return [FormatField False Nothing Nothing $ FieldNo $ read x] descriptionfield = do string "description-field" many1 spacenonewline formatS <- descriptionFieldValue restofline updateState (\x -> x{descriptionField=formatS}) amountfield = do string "amount-field" many1 spacenonewline v <- restofline x <- updateState (\r -> r{amountField=readMay v}) return x amountinfield = do choice [string "amount-in-field", string "in-field"] many1 spacenonewline v <- restofline updateState (\r -> r{amountInField=readMay v}) amountoutfield = do choice [string "amount-out-field", string "out-field"] many1 spacenonewline v <- restofline updateState (\r -> r{amountOutField=readMay v}) currencyfield = do string "currency-field" many1 spacenonewline v <- restofline updateState (\r -> r{currencyField=readMay v}) accountfield = do string "account-field" many1 spacenonewline v <- restofline updateState (\r -> r{accountField=readMay v}) account2field = do string "account2-field" many1 spacenonewline v <- restofline updateState (\r -> r{account2Field=readMay v}) basecurrency = do choice [string "base-currency", string "currency"] many1 spacenonewline v <- restofline updateState (\r -> r{baseCurrency=Just v}) baseaccount = do string "base-account" many1 spacenonewline v <- accountname optional newline updateState (\r -> r{baseAccount=v}) accountrule :: GenParser Char CsvRules AccountRule accountrule = do many blankorcommentline pats <- many1 matchreplacepattern guard $ length pats >= 2 let pats' = init pats acct = either (fail.show) id $ runParser accountname () "" $ fst $ last pats many blankorcommentline return (pats',acct) "account rule" blankline = many spacenonewline >> newline >> return () "blank line" commentchar = oneOf ";#" commentline = many spacenonewline >> commentchar >> restofline >> return () "comment line" blankorcommentline = choice' [blankline, commentline] matchreplacepattern = do notFollowedBy commentchar matchpat <- many1 (noneOf "=\n") replpat <- optionMaybe $ do {char '='; many $ noneOf "\n"} newline return (matchpat,replpat) validateRules :: CsvRules -> Either String CsvRules validateRules rules = let hasAmount = isJust $ amountField rules hasIn = isJust $ amountInField rules hasOut = isJust $ amountOutField rules in case (hasAmount, hasIn, hasOut) of (True, True, _) -> Left "Don't specify amount-in-field when specifying amount-field" (True, _, True) -> Left "Don't specify amount-out-field when specifying amount-field" (_, False, True) -> Left "Please specify amount-in-field when specifying amount-out-field" (_, True, False) -> Left "Please specify amount-out-field when specifying amount-in-field" (False, False, False) -> Left "Please specify either amount-field, or amount-in-field and amount-out-field" _ -> Right rules -- csv record conversion formatD :: CsvRecord -> Bool -> Maybe Int -> Maybe Int -> HledgerFormatField -> String formatD record leftJustified min max f = case f of FieldNo n -> maybe "" show $ atMay record n -- Some of these might in theory in read from fields AccountField -> "" DepthSpacerField -> "" TotalField -> "" DefaultDateField -> "" DescriptionField -> "" where show = formatValue leftJustified min max formatDescription :: CsvRecord -> [FormatString] -> String formatDescription _ [] = "" formatDescription record (f:fs) = s ++ (formatDescription record fs) where s = case f of FormatLiteral l -> l FormatField leftJustified min max field -> formatD record leftJustified min max field transactionFromCsvRecord :: CsvRules -> CsvRecord -> Transaction transactionFromCsvRecord rules fields = let date = parsedate $ normaliseDate (dateFormat rules) $ maybe "1900/1/1" (atDef "" fields) (dateField rules) effectivedate = do idx <- effectiveDateField rules return $ parsedate $ normaliseDate (dateFormat rules) $ (atDef "" fields) idx status = maybe False (null . strip . (atDef "" fields)) (statusField rules) code = maybe "" (atDef "" fields) (codeField rules) desc = formatDescription fields (descriptionField rules) comment = "" precomment = "" baseacc = maybe (baseAccount rules) (atDef "" fields) (accountField rules) amountstr = getAmount rules fields amountstr' = strnegate amountstr where strnegate ('-':s) = s strnegate s = '-':s currency = maybe (fromMaybe "" $ baseCurrency rules) (atDef "" fields) (currencyField rules) amountstr'' = currency ++ amountstr' amountparse = runParser amount nullctx "" amountstr'' a = either (const nullmixedamt) id amountparse -- Using costOfMixedAmount here to allow complex costs like "10 GBP @@ 15 USD". -- Aim is to have "10 GBP @@ 15 USD" applied to account "acct", but have "-15USD" applied to "baseacct" baseamount = costOfMixedAmount a unknownacct | (readDef 0 amountstr' :: Double) < 0 = "income:unknown" | otherwise = "expenses:unknown" (acct',newdesc) = identify (accountRules rules) unknownacct desc acct = maybe acct' (atDef "" fields) (account2Field rules) t = Transaction { tdate=date, teffectivedate=effectivedate, tstatus=status, tcode=code, tdescription=newdesc, tcomment=comment, tpreceding_comment_lines=precomment, ttags=[], tpostings=[ Posting { pstatus=False, paccount=acct, pamount=a, pcomment="", ptype=RegularPosting, ptags=[], ptransaction=Just t }, Posting { pstatus=False, paccount=baseacc, pamount=(-baseamount), pcomment="", ptype=RegularPosting, ptags=[], ptransaction=Just t } ] } in t -- | Convert some date string with unknown format to YYYY/MM/DD. normaliseDate :: Maybe String -- ^ User-supplied date format: this should be tried in preference to all others -> String -> String normaliseDate mb_user_format s = let parsewith = flip (parseTime defaultTimeLocale) s in maybe (error' $ "could not parse \""++s++"\" as a date, consider adding a date-format directive or upgrading") showDate $ firstJust $ (map parsewith $ maybe [] (:[]) mb_user_format -- the - modifier requires time-1.2.0.5, released -- in 2011/5, so for now we emulate it for wider -- compatibility. time < 1.2.0.5 also has a buggy -- %y which we don't do anything about. -- ++ [ -- "%Y/%m/%d" -- ,"%Y/%-m/%-d" -- ,"%Y-%m-%d" -- ,"%Y-%-m-%-d" -- ,"%m/%d/%Y" -- ,"%-m/%-d/%Y" -- ,"%m-%d-%Y" -- ,"%-m-%-d-%Y" -- ] ) ++ [ parseTime defaultTimeLocale "%Y/%m/%e" s ,parseTime defaultTimeLocale "%Y-%m-%e" s ,parseTime defaultTimeLocale "%m/%e/%Y" s ,parseTime defaultTimeLocale "%m-%e-%Y" s ,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s) ,parseTime defaultTimeLocale "%Y-%m-%e" (take 5 s ++ "0" ++ drop 5 s) ,parseTime defaultTimeLocale "%m/%e/%Y" ('0':s) ,parseTime defaultTimeLocale "%m-%e-%Y" ('0':s) ] -- | Apply account matching rules to a transaction description to obtain -- the most appropriate account and a new description. identify :: [AccountRule] -> String -> String -> (String,String) identify rules defacct desc | null matchingrules = (defacct,desc) | otherwise = (acct,newdesc) where matchingrules = filter ismatch rules :: [AccountRule] where ismatch = any ((`regexMatchesCI` desc) . fst) . fst (prs,acct) = head matchingrules p_ms_r = filter (\(_,m,_) -> m) $ map (\(p,r) -> (p, p `regexMatchesCI` desc, r)) prs (p,_,r) = head p_ms_r newdesc = case r of Just repl -> regexReplaceCI p repl desc Nothing -> desc getAmount :: CsvRules -> CsvRecord -> String getAmount rules fields = case amountField rules of Just f -> maybe "" (atDef "" fields) $ Just f Nothing -> case (i, o) of (x, "") -> x ("", x) -> "-"++x p -> error' $ "using amount-in-field and amount-out-field, found a value in both fields: "++show p where i = maybe "" (atDef "" fields) (amountInField rules) o = maybe "" (atDef "" fields) (amountOutField rules) tests_Hledger_Read_CsvReader = TestList (test_parser ++ test_description_parsing) test_description_parsing = [ "description-field 1" ~: assertParseDescription "description-field 1\n" [FormatField False Nothing Nothing (FieldNo 1)] , "description-field 1 " ~: assertParseDescription "description-field 1 \n" [FormatField False Nothing Nothing (FieldNo 1)] , "description-field %(1)" ~: assertParseDescription "description-field %(1)\n" [FormatField False Nothing Nothing (FieldNo 1)] , "description-field %(1)/$(2)" ~: assertParseDescription "description-field %(1)/%(2)\n" [ FormatField False Nothing Nothing (FieldNo 1) , FormatLiteral "/" , FormatField False Nothing Nothing (FieldNo 2) ] ] where assertParseDescription string expected = do assertParseEqual (parseDescription string) (nullrules {descriptionField = expected}) parseDescription :: String -> Either ParseError CsvRules parseDescription x = runParser descriptionfieldWrapper nullrules "(unknown)" x descriptionfieldWrapper :: GenParser Char CsvRules CsvRules descriptionfieldWrapper = do descriptionfield r <- getState return r test_parser = [ "convert rules parsing: empty file" ~: do -- let assertMixedAmountParse parseresult mixedamount = -- (either (const "parse error") showMixedAmountDebug parseresult) ~?= (showMixedAmountDebug mixedamount) assertParseEqual (parseCsvRules "unknown" "") nullrules ,"convert rules parsing: accountrule" ~: do assertParseEqual (parseWithCtx nullrules accountrule "A\na\n") -- leading blank line required ([("A",Nothing)], "a") ,"convert rules parsing: trailing comments" ~: do assertParse (parseWithCtx nullrules csvrulesfile "A\na\n# \n#\n") ,"convert rules parsing: trailing blank lines" ~: do assertParse (parseWithCtx nullrules csvrulesfile "A\na\n\n \n") -- not supported -- ,"convert rules parsing: no final newline" ~: do -- assertParse (parseWithCtx nullrules csvrulesfile "A\na") -- assertParse (parseWithCtx nullrules csvrulesfile "A\na\n# \n#") -- assertParse (parseWithCtx nullrules csvrulesfile "A\na\n\n ") -- (nullrules{ -- -- dateField=Maybe FieldPosition, -- -- statusField=Maybe FieldPosition, -- -- codeField=Maybe FieldPosition, -- -- descriptionField=Maybe FieldPosition, -- -- amountField=Maybe FieldPosition, -- -- currencyField=Maybe FieldPosition, -- -- baseCurrency=Maybe String, -- -- baseAccount=AccountName, -- accountRules=[ -- ([("A",Nothing)], "a") -- ] -- }) ]