diff --git a/Commands/Convert.hs b/Commands/Convert.hs index 99cf31539..f1183c560 100644 --- a/Commands/Convert.hs +++ b/Commands/Convert.hs @@ -1,98 +1,249 @@ {-| - Convert account data in CSV format (eg downloaded from a bank) to ledger format, and print it on stdout. See the manual for more details. - -} module Commands.Convert where -import Data.List.Split (splitOn) -import Options -- (Opt,Debug) -import Ledger.Types (Ledger,AccountName) -import Ledger.Utils (strip) -import System.IO (stderr, hPutStr) +import Options (Opt(Debug)) +import Ledger.Types (Ledger,AccountName,LedgerTransaction(..),Posting(..),PostingType(..)) +import Ledger.Utils (strip, spacenonewline, restofline) +import Ledger.Parse (someamount, emptyCtx, ledgeraccountname) +import Ledger.Amount (nullmixedamt) +import System.IO (stderr) import Text.CSV (parseCSVFromFile, printCSV) -import Text.Printf (printf) +import Text.Printf (hPrintf) import Text.RegexPR (matchRegexPR) import Data.Maybe -import Ledger.Dates (firstJust, showDate) +import Ledger.Dates (firstJust, showDate, parsedate) import Locale (defaultTimeLocale) import Data.Time.Format (parseTime) -import Control.Monad (when) -import Safe (readMay, readDef) +import Control.Monad (when, guard) +import Safe (readDef, readMay) +import System.FilePath.Posix (takeBaseName) +import Text.ParserCombinators.Parsec convert :: [Opt] -> [String] -> Ledger -> IO () convert opts args _ = do - when (length args /= 3) (error "please specify a csv file, base account, and import rules file.") - let [csvfile,baseacct,rulesfile] = args - rulesstr <- readFile rulesfile - (fieldpositions,rules) <- parseRules rulesstr - parse <- parseCSVFromFile csvfile - let records = case parse of + when (length args /= 2) (error "please specify a csv data file and conversion rules file.") + let debug = Debug `elem` opts + [csvfile,rulesfile] = args + csvparse <- parseCSVFromFile csvfile + let records = case csvparse of Left e -> error $ show e - Right rs -> reverse rs - mapM_ (print_ledger_txn (Debug `elem` opts) (baseacct,fieldpositions,rules)) records + Right rs -> reverse $ filter (/= [""]) rs + rulesstr <- readFile rulesfile + let rules = case parseCsvRules (takeBaseName csvfile) rulesstr of + Left e -> error $ show e + Right r -> r + when debug $ hPrintf stderr "using csv conversion rules file %s\n" rulesfile + when debug $ hPrintf stderr "%s\n" (show rules) + mapM_ (printTxn debug rules) records +{- | +A set of data definitions and account-matching patterns sufficient to +convert a particular CSV data file into meaningful ledger transactions. See above. +-} +data CsvRules = CsvRules { + dateField :: Maybe FieldPosition, + statusField :: Maybe FieldPosition, + codeField :: Maybe FieldPosition, + descriptionField :: Maybe FieldPosition, + amountField :: Maybe FieldPosition, + currencyField :: Maybe FieldPosition, + baseCurrency :: Maybe String, + baseAccount :: AccountName, + accountRules :: [AccountRule] +} deriving (Show) -type Rule = ( - [(String, Maybe String)] -- list of patterns and optional replacements - ,AccountName -- account name to use for a matched transaction +nullrules = CsvRules { + dateField=Nothing, + statusField=Nothing, + codeField=Nothing, + descriptionField=Nothing, + amountField=Nothing, + currencyField=Nothing, + baseCurrency=Nothing, + baseAccount="unknown", + accountRules=[] +} + +type FieldPosition = Int + +type AccountRule = ( + [(String, Maybe String)] -- list of regex match patterns with optional replacements + ,AccountName -- account name to use for a transaction matching this rule ) -parseRules :: String -> IO ([Maybe Int],[Rule]) -parseRules s = do - let ls = map strip $ lines s - let paras = splitOn [""] ls - let fieldpositions = map readMay $ splitOn "," $ head $ head paras - let rules = [(map parsePatRepl $ init ls, last ls) | ls <- tail paras] - return (fieldpositions,rules) +type CsvRecord = [String] -parsePatRepl :: String -> (String, Maybe String) -parsePatRepl l = case splitOn "=" l of - (p:r:_) -> (p, Just r) - _ -> (l, Nothing) +-- rules file parser -print_ledger_txn :: Bool -> (String,[Maybe Int],[Rule]) -> [String] -> IO () -print_ledger_txn _ (_,[],_) _ = return () -print_ledger_txn _ (('#':_),_,_) _ = return () -print_ledger_txn debug (baseacct,fieldpositions,rules) csvrecord - | length csvrecord < maximum (map (fromMaybe 0) fieldpositions) + 1 = return () - | otherwise = - do - when debug $ hPutStr stderr $ printCSV [csvrecord] - let date = maybe "" (csvrecord !!) (fieldpositions !! 0) - number = maybe "" (csvrecord !!) (fieldpositions !! 1) - description = maybe "" (csvrecord !!) (fieldpositions !! 2) - amount = maybe "" (csvrecord !!) (fieldpositions !! 3) - amount' = strnegate amount where strnegate ('-':s) = s - strnegate s = '-':s - unknownacct | (readDef 0 amount' :: Double) < 0 = "income:unknown" +parseCsvRules :: String -> String -> Either ParseError CsvRules +parseCsvRules basefilename s = runParser csvrulesP nullrules{baseAccount=basefilename} "" s + +csvrulesP :: GenParser Char CsvRules CsvRules +csvrulesP = do + optional blanklines + many definitions + r <- getState + ars <- many accountrule + optional blanklines + eof + return r{accountRules=ars} + +-- | Real independent parser choice, even when alternative matches share a prefix. +choice' parsers = choice $ map try (init parsers) ++ [last parsers] + +definitions :: GenParser Char CsvRules () +definitions = do + choice' [ + datefield + ,statusfield + ,codefield + ,descriptionfield + ,amountfield + ,currencyfield + ,basecurrency + ,baseaccount + ] "definition" + return () + +datefield = do + string "date-field" + many1 spacenonewline + v <- restofline + r <- getState + setState r{dateField=readMay v} + +codefield = do + string "code-field" + many1 spacenonewline + v <- restofline + r <- getState + setState r{codeField=readMay v} + +statusfield = do + string "status-field" + many1 spacenonewline + v <- restofline + r <- getState + setState r{statusField=readMay v} + +descriptionfield = do + string "description-field" + many1 spacenonewline + v <- restofline + r <- getState + setState r{descriptionField=readMay v} + +amountfield = do + string "amount-field" + many1 spacenonewline + v <- restofline + r <- getState + setState r{amountField=readMay v} + +currencyfield = do + string "currency-field" + many1 spacenonewline + v <- restofline + r <- getState + setState r{currencyField=readMay v} + +basecurrency = do + string "currency" + many1 spacenonewline + v <- restofline + r <- getState + setState r{baseCurrency=Just v} + +baseaccount = do + string "base-account" + many1 spacenonewline + v <- ledgeraccountname + optional newline + r <- getState + setState r{baseAccount=v} + +accountrule :: GenParser Char CsvRules AccountRule +accountrule = do + blanklines + pats <- many1 matchreplacepattern + guard $ length pats >= 2 + let pats' = init pats + acct = either (fail.show) id $ runParser ledgeraccountname () "" $ fst $ last pats + return (pats',acct) + +blanklines = many1 blankline >> return () + +blankline = many spacenonewline >> newline >> return () "blank line" + +matchreplacepattern = do + matchpat <- many1 (noneOf "=\n") + replpat <- optionMaybe $ do {char '='; many $ noneOf "\n"} + newline + return (matchpat,replpat) + +printTxn :: Bool -> CsvRules -> CsvRecord -> IO () +printTxn debug rules rec = do + when debug $ hPrintf stderr "csv: %s" (printCSV [rec]) + putStr $ show $ transactionFromCsvRecord rules rec + +-- csv record conversion + +transactionFromCsvRecord :: CsvRules -> CsvRecord -> LedgerTransaction +transactionFromCsvRecord rules fields = + let + date = parsedate $ normaliseDate $ maybe "1900/1/1" (fields !!) (dateField rules) + status = maybe False (null . strip . (fields !!)) (statusField rules) + code = maybe "" (fields !!) (codeField rules) + desc = maybe "" (fields !!) (descriptionField rules) + comment = "" + precomment = "" + amountstr = maybe "" (fields !!) (amountField rules) + amountstr' = strnegate amountstr where strnegate ('-':s) = s + strnegate s = '-':s + currency = maybe (fromMaybe "" $ baseCurrency rules) (fields !!) (currencyField rules) + amountstr'' = currency ++ amountstr' + amountparse = runParser someamount emptyCtx "" amountstr'' + amount = either (const nullmixedamt) id amountparse + unknownacct | (readDef 0 amountstr' :: Double) < 0 = "income:unknown" | otherwise = "expenses:unknown" - (acct,desc) = choose_acct_desc rules (unknownacct,description) - printf "%s%s %s\n" (fixdate date) (if not (null number) then printf " (%s)" number else "") desc - printf " %-30s %15s\n" acct (printf "$%s" amount' :: String) - printf " %s\n\n" baseacct + (acct,newdesc) = identify (accountRules rules) unknownacct desc + in + LedgerTransaction { + ltdate=date, + lteffectivedate=Nothing, + ltstatus=status, + ltcode=code, + ltdescription=newdesc, + ltcomment=comment, + ltpreceding_comment_lines=precomment, + ltpostings=[ + Posting { + pstatus=False, + paccount=acct, + pamount=amount, + pcomment="", + ptype=RegularPosting + }, + Posting { + pstatus=False, + paccount=baseAccount rules, + pamount=(-amount), + pcomment="", + ptype=RegularPosting + } + ] + } -choose_acct_desc :: [Rule] -> (String,String) -> (String,String) -choose_acct_desc rules (acct,desc) | null matchingrules = (acct,desc) - | otherwise = (a,d) - where - matchingrules = filter ismatch rules :: [Rule] - where ismatch = any (isJust . flip matchregex desc . fst) . fst - (prs,a) = head matchingrules - mrs = filter (isJust . fst) $ map (\(p,r) -> (matchregex p desc, r)) prs - (m,repl) = head mrs - matched = fst $ fst $ fromJust m - d = fromMaybe matched repl - -matchregex = matchRegexPR . ("(?i)" ++) - -fixdate :: String -> String -fixdate s = maybe "0000/00/00" showDate $ +-- | Convert some date string with unknown format to YYYY/MM/DD. +normaliseDate :: String -> String +normaliseDate s = maybe "0000/00/00" showDate $ firstJust [parseTime defaultTimeLocale "%Y/%m/%e" s - -- can't parse a month without leading 0, try adding onee + -- can't parse a month without leading 0, try adding one ,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s) ,parseTime defaultTimeLocale "%Y-%m-%e" s ,parseTime defaultTimeLocale "%Y-%m-%e" (take 5 s ++ "0" ++ drop 5 s) @@ -102,3 +253,19 @@ fixdate s = maybe "0000/00/00" showDate $ ,parseTime defaultTimeLocale "%m-%e-%Y" ('0':s) ] +-- | Apply account matching rules to a transaction description to obtain +-- the most appropriate account and a new description. +identify :: [AccountRule] -> String -> String -> (String,String) +identify rules defacct desc | null matchingrules = (defacct,desc) + | otherwise = (acct,newdesc) + where + matchingrules = filter ismatch rules :: [AccountRule] + where ismatch = any (isJust . flip matchregex desc . fst) . fst + (prs,acct) = head matchingrules + mrs = filter (isJust . fst) $ map (\(p,r) -> (matchregex p desc, r)) prs + (m,repl) = head mrs + matched = fst $ fst $ fromJust m + newdesc = fromMaybe matched repl + +matchregex = matchRegexPR . ("(?i)" ++) + diff --git a/Options.hs b/Options.hs index f35e2c95c..2cc208afe 100644 --- a/Options.hs +++ b/Options.hs @@ -20,7 +20,7 @@ timeprogname = "hours" usagehdr = "Usage: hledger [OPTIONS] [COMMAND [PATTERNS]]\n" ++ " hours [OPTIONS] [COMMAND [PATTERNS]]\n" ++ - " hledger convert CSVFILE ACCOUNTNAME RULESFILE\n" ++ + " hledger convert CSVFILE RULESFILE\n" ++ "\n" ++ "hledger uses your ~/.ledger or $LEDGER file (or another specified with -f),\n" ++ "while hours uses your ~/.timelog or $TIMELOG file.\n" ++ diff --git a/README b/README index db6fd6fa5..5cb6edf3e 100644 --- a/README +++ b/README @@ -157,26 +157,35 @@ Commands Convert ,,,,,,, -The convert command converts CSV (comma-separated value) files downloaded -from a bank into ledger format. Doing a bulk import in this way can be a -easier than entering each transaction by hand. The downside is you no -longer have your own data with which to detect errors by the bank. +The convert command reads a CSV_ file you have downloaded from your bank, +and prints out the transactions in ledger format, suitable for adding to +your ledger. This can be a lot quicker than entering every transaction by +hand. (The downside is that you are less likely to notice if your bank makes an +error, so keep an eye on them!) -Usage is different from the other commands, typically it looks like this:: +Use it like this:: - hledger convert FILE.csv BASEACCOUNT FILE.rules >FILE.ledger - (review FILE.ledger, then copy and paste into main ledger) + hledger convert FILE.csv FILE.rules >FILE.ledger -Ie convert the csv data and save the output into a similarly named ledger file. -(It's not required, but convenient to use the same base filename for the -csv, rules and output files as above.) +This will convert the csv data in FILE.csv using the conversion hints in +FILE.rules, and save the output into a temporary ledger file. Then you +should review FILE.ledger for problems; update the rules and convert again +if needed; and finally copy/paste transactions which are new into your +main ledger. -BASEACCOUNT is the source account for these transactions, eg -``assets:bank:checking``. +rules file +'''''''''' -FILE.rules contains some rules to help convert the data. Here's an example:: +A .rules file contains some data definitions and some rules for assigning +destination accounts to transactions. Typically you will have one csv file +and corresponding rules file per bank account. Here's an example rules +file for converting the csv download from a Wells Fargo checking account:: - 0,-,4,1 + base-account assets:bank:checking + date-field 0 + description-field 4 + amount-field 1 + currency $ SPECTRUM expenses:health:gym @@ -188,21 +197,33 @@ FILE.rules contains some rules to help convert the data. Here's an example:: (TO|FROM) SAVINGS assets:bank:savings -It must contain: +This says: -- paragraphs separated by one blank line. +- the ledger account corresponding to this csv file is assets:bank:checking +- the first csv field is the date, the second is the amount, the fifth is the description +- prepend a dollar sign to the amount field +- if description contains SPECTRUM (case-insensitive), the transaction is a gym expense +- if description contains ITUNES or BLKBSTR, the transaction is an entertainment expense; also rewrite BLKBSTR as BLOCKBUSTER +- if description contains TO SAVINGS or FROM SAVINGS, the transaction is a savings transfer -- The first paragraph is a single line of four comma-separated fields, - which are numbers indicating the (0-based) csv field positions - corresponding to the transaction date, code/number, description, and amount. - If a field does not exist in the csv, use - to specify a default value. +Notes: -- Other paragraphs consist of one or more regular expression patterns, one - per line, followed by a line specifying the account to use when a - transaction's description matches any of these patterns. Patterns may - optionally have a replacement pattern specified after =, otherwise the - matching part is used. +- Definitions must come first, one per line, all in one paragraph. Each + is a name and a value separated by whitespace. Supported names are: + base-account, date-field, status-field, code-field, description-field, + amount-field, currency-field, currency. All are optional and will + use defaults if not specified. +- The remainder of the file is account-assigning rules. Each is a + paragraph consisting of one or more description-matching patterns + (case-insensitive regular expressions), one per line, followed by the + account name to use when the transaction's description matches any of + these patterns. + +- A match pattern may optionally be followed by = and a replacement + pattern, which will become the ledger transaction's description. + Otherwise the matched part of the csv description is used. (To preserve + the full csv description, use .* before and after the match pattern.) Smart dates ........... @@ -377,3 +398,5 @@ Other differences .. _c++ ledger's manual: http://joyful.com/repos/ledger/doc/ledger.html .. _binaries: http://hledger.org/binaries/ .. _Haskell Platform: http://hackage.haskell.org/platform/ +.. _CSV: http://en.wikipedia.org/wiki/Comma-separated_values +