convert: new rules file format, more docs

This commit is contained in:
Simon Michael 2009-12-03 20:54:20 +00:00
parent 43e327d05b
commit 2607082e9e
3 changed files with 285 additions and 95 deletions

View File

@ -1,98 +1,249 @@
{-| {-|
Convert account data in CSV format (eg downloaded from a bank) to ledger Convert account data in CSV format (eg downloaded from a bank) to ledger
format, and print it on stdout. See the manual for more details. format, and print it on stdout. See the manual for more details.
-} -}
module Commands.Convert where module Commands.Convert where
import Data.List.Split (splitOn) import Options (Opt(Debug))
import Options -- (Opt,Debug) import Ledger.Types (Ledger,AccountName,LedgerTransaction(..),Posting(..),PostingType(..))
import Ledger.Types (Ledger,AccountName) import Ledger.Utils (strip, spacenonewline, restofline)
import Ledger.Utils (strip) import Ledger.Parse (someamount, emptyCtx, ledgeraccountname)
import System.IO (stderr, hPutStr) import Ledger.Amount (nullmixedamt)
import System.IO (stderr)
import Text.CSV (parseCSVFromFile, printCSV) import Text.CSV (parseCSVFromFile, printCSV)
import Text.Printf (printf) import Text.Printf (hPrintf)
import Text.RegexPR (matchRegexPR) import Text.RegexPR (matchRegexPR)
import Data.Maybe import Data.Maybe
import Ledger.Dates (firstJust, showDate) import Ledger.Dates (firstJust, showDate, parsedate)
import Locale (defaultTimeLocale) import Locale (defaultTimeLocale)
import Data.Time.Format (parseTime) import Data.Time.Format (parseTime)
import Control.Monad (when) import Control.Monad (when, guard)
import Safe (readMay, readDef) import Safe (readDef, readMay)
import System.FilePath.Posix (takeBaseName)
import Text.ParserCombinators.Parsec
convert :: [Opt] -> [String] -> Ledger -> IO () convert :: [Opt] -> [String] -> Ledger -> IO ()
convert opts args _ = do convert opts args _ = do
when (length args /= 3) (error "please specify a csv file, base account, and import rules file.") when (length args /= 2) (error "please specify a csv data file and conversion rules file.")
let [csvfile,baseacct,rulesfile] = args let debug = Debug `elem` opts
rulesstr <- readFile rulesfile [csvfile,rulesfile] = args
(fieldpositions,rules) <- parseRules rulesstr csvparse <- parseCSVFromFile csvfile
parse <- parseCSVFromFile csvfile let records = case csvparse of
let records = case parse of
Left e -> error $ show e Left e -> error $ show e
Right rs -> reverse rs Right rs -> reverse $ filter (/= [""]) rs
mapM_ (print_ledger_txn (Debug `elem` opts) (baseacct,fieldpositions,rules)) records rulesstr <- readFile rulesfile
let rules = case parseCsvRules (takeBaseName csvfile) rulesstr of
Left e -> error $ show e
Right r -> r
when debug $ hPrintf stderr "using csv conversion rules file %s\n" rulesfile
when debug $ hPrintf stderr "%s\n" (show rules)
mapM_ (printTxn debug rules) records
{- |
A set of data definitions and account-matching patterns sufficient to
convert a particular CSV data file into meaningful ledger transactions. See above.
-}
data CsvRules = CsvRules {
dateField :: Maybe FieldPosition,
statusField :: Maybe FieldPosition,
codeField :: Maybe FieldPosition,
descriptionField :: Maybe FieldPosition,
amountField :: Maybe FieldPosition,
currencyField :: Maybe FieldPosition,
baseCurrency :: Maybe String,
baseAccount :: AccountName,
accountRules :: [AccountRule]
} deriving (Show)
type Rule = ( nullrules = CsvRules {
[(String, Maybe String)] -- list of patterns and optional replacements dateField=Nothing,
,AccountName -- account name to use for a matched transaction statusField=Nothing,
codeField=Nothing,
descriptionField=Nothing,
amountField=Nothing,
currencyField=Nothing,
baseCurrency=Nothing,
baseAccount="unknown",
accountRules=[]
}
type FieldPosition = Int
type AccountRule = (
[(String, Maybe String)] -- list of regex match patterns with optional replacements
,AccountName -- account name to use for a transaction matching this rule
) )
parseRules :: String -> IO ([Maybe Int],[Rule]) type CsvRecord = [String]
parseRules s = do
let ls = map strip $ lines s
let paras = splitOn [""] ls
let fieldpositions = map readMay $ splitOn "," $ head $ head paras
let rules = [(map parsePatRepl $ init ls, last ls) | ls <- tail paras]
return (fieldpositions,rules)
parsePatRepl :: String -> (String, Maybe String) -- rules file parser
parsePatRepl l = case splitOn "=" l of
(p:r:_) -> (p, Just r)
_ -> (l, Nothing)
print_ledger_txn :: Bool -> (String,[Maybe Int],[Rule]) -> [String] -> IO () parseCsvRules :: String -> String -> Either ParseError CsvRules
print_ledger_txn _ (_,[],_) _ = return () parseCsvRules basefilename s = runParser csvrulesP nullrules{baseAccount=basefilename} "" s
print_ledger_txn _ (('#':_),_,_) _ = return ()
print_ledger_txn debug (baseacct,fieldpositions,rules) csvrecord csvrulesP :: GenParser Char CsvRules CsvRules
| length csvrecord < maximum (map (fromMaybe 0) fieldpositions) + 1 = return () csvrulesP = do
| otherwise = optional blanklines
do many definitions
when debug $ hPutStr stderr $ printCSV [csvrecord] r <- getState
let date = maybe "" (csvrecord !!) (fieldpositions !! 0) ars <- many accountrule
number = maybe "" (csvrecord !!) (fieldpositions !! 1) optional blanklines
description = maybe "" (csvrecord !!) (fieldpositions !! 2) eof
amount = maybe "" (csvrecord !!) (fieldpositions !! 3) return r{accountRules=ars}
amount' = strnegate amount where strnegate ('-':s) = s
strnegate s = '-':s -- | Real independent parser choice, even when alternative matches share a prefix.
unknownacct | (readDef 0 amount' :: Double) < 0 = "income:unknown" choice' parsers = choice $ map try (init parsers) ++ [last parsers]
definitions :: GenParser Char CsvRules ()
definitions = do
choice' [
datefield
,statusfield
,codefield
,descriptionfield
,amountfield
,currencyfield
,basecurrency
,baseaccount
] <?> "definition"
return ()
datefield = do
string "date-field"
many1 spacenonewline
v <- restofline
r <- getState
setState r{dateField=readMay v}
codefield = do
string "code-field"
many1 spacenonewline
v <- restofline
r <- getState
setState r{codeField=readMay v}
statusfield = do
string "status-field"
many1 spacenonewline
v <- restofline
r <- getState
setState r{statusField=readMay v}
descriptionfield = do
string "description-field"
many1 spacenonewline
v <- restofline
r <- getState
setState r{descriptionField=readMay v}
amountfield = do
string "amount-field"
many1 spacenonewline
v <- restofline
r <- getState
setState r{amountField=readMay v}
currencyfield = do
string "currency-field"
many1 spacenonewline
v <- restofline
r <- getState
setState r{currencyField=readMay v}
basecurrency = do
string "currency"
many1 spacenonewline
v <- restofline
r <- getState
setState r{baseCurrency=Just v}
baseaccount = do
string "base-account"
many1 spacenonewline
v <- ledgeraccountname
optional newline
r <- getState
setState r{baseAccount=v}
accountrule :: GenParser Char CsvRules AccountRule
accountrule = do
blanklines
pats <- many1 matchreplacepattern
guard $ length pats >= 2
let pats' = init pats
acct = either (fail.show) id $ runParser ledgeraccountname () "" $ fst $ last pats
return (pats',acct)
blanklines = many1 blankline >> return ()
blankline = many spacenonewline >> newline >> return () <?> "blank line"
matchreplacepattern = do
matchpat <- many1 (noneOf "=\n")
replpat <- optionMaybe $ do {char '='; many $ noneOf "\n"}
newline
return (matchpat,replpat)
printTxn :: Bool -> CsvRules -> CsvRecord -> IO ()
printTxn debug rules rec = do
when debug $ hPrintf stderr "csv: %s" (printCSV [rec])
putStr $ show $ transactionFromCsvRecord rules rec
-- csv record conversion
transactionFromCsvRecord :: CsvRules -> CsvRecord -> LedgerTransaction
transactionFromCsvRecord rules fields =
let
date = parsedate $ normaliseDate $ maybe "1900/1/1" (fields !!) (dateField rules)
status = maybe False (null . strip . (fields !!)) (statusField rules)
code = maybe "" (fields !!) (codeField rules)
desc = maybe "" (fields !!) (descriptionField rules)
comment = ""
precomment = ""
amountstr = maybe "" (fields !!) (amountField rules)
amountstr' = strnegate amountstr where strnegate ('-':s) = s
strnegate s = '-':s
currency = maybe (fromMaybe "" $ baseCurrency rules) (fields !!) (currencyField rules)
amountstr'' = currency ++ amountstr'
amountparse = runParser someamount emptyCtx "" amountstr''
amount = either (const nullmixedamt) id amountparse
unknownacct | (readDef 0 amountstr' :: Double) < 0 = "income:unknown"
| otherwise = "expenses:unknown" | otherwise = "expenses:unknown"
(acct,desc) = choose_acct_desc rules (unknownacct,description) (acct,newdesc) = identify (accountRules rules) unknownacct desc
printf "%s%s %s\n" (fixdate date) (if not (null number) then printf " (%s)" number else "") desc in
printf " %-30s %15s\n" acct (printf "$%s" amount' :: String) LedgerTransaction {
printf " %s\n\n" baseacct ltdate=date,
lteffectivedate=Nothing,
ltstatus=status,
ltcode=code,
ltdescription=newdesc,
ltcomment=comment,
ltpreceding_comment_lines=precomment,
ltpostings=[
Posting {
pstatus=False,
paccount=acct,
pamount=amount,
pcomment="",
ptype=RegularPosting
},
Posting {
pstatus=False,
paccount=baseAccount rules,
pamount=(-amount),
pcomment="",
ptype=RegularPosting
}
]
}
choose_acct_desc :: [Rule] -> (String,String) -> (String,String) -- | Convert some date string with unknown format to YYYY/MM/DD.
choose_acct_desc rules (acct,desc) | null matchingrules = (acct,desc) normaliseDate :: String -> String
| otherwise = (a,d) normaliseDate s = maybe "0000/00/00" showDate $
where
matchingrules = filter ismatch rules :: [Rule]
where ismatch = any (isJust . flip matchregex desc . fst) . fst
(prs,a) = head matchingrules
mrs = filter (isJust . fst) $ map (\(p,r) -> (matchregex p desc, r)) prs
(m,repl) = head mrs
matched = fst $ fst $ fromJust m
d = fromMaybe matched repl
matchregex = matchRegexPR . ("(?i)" ++)
fixdate :: String -> String
fixdate s = maybe "0000/00/00" showDate $
firstJust firstJust
[parseTime defaultTimeLocale "%Y/%m/%e" s [parseTime defaultTimeLocale "%Y/%m/%e" s
-- can't parse a month without leading 0, try adding onee -- can't parse a month without leading 0, try adding one
,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s) ,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s)
,parseTime defaultTimeLocale "%Y-%m-%e" s ,parseTime defaultTimeLocale "%Y-%m-%e" s
,parseTime defaultTimeLocale "%Y-%m-%e" (take 5 s ++ "0" ++ drop 5 s) ,parseTime defaultTimeLocale "%Y-%m-%e" (take 5 s ++ "0" ++ drop 5 s)
@ -102,3 +253,19 @@ fixdate s = maybe "0000/00/00" showDate $
,parseTime defaultTimeLocale "%m-%e-%Y" ('0':s) ,parseTime defaultTimeLocale "%m-%e-%Y" ('0':s)
] ]
-- | Apply account matching rules to a transaction description to obtain
-- the most appropriate account and a new description.
identify :: [AccountRule] -> String -> String -> (String,String)
identify rules defacct desc | null matchingrules = (defacct,desc)
| otherwise = (acct,newdesc)
where
matchingrules = filter ismatch rules :: [AccountRule]
where ismatch = any (isJust . flip matchregex desc . fst) . fst
(prs,acct) = head matchingrules
mrs = filter (isJust . fst) $ map (\(p,r) -> (matchregex p desc, r)) prs
(m,repl) = head mrs
matched = fst $ fst $ fromJust m
newdesc = fromMaybe matched repl
matchregex = matchRegexPR . ("(?i)" ++)

View File

@ -20,7 +20,7 @@ timeprogname = "hours"
usagehdr = usagehdr =
"Usage: hledger [OPTIONS] [COMMAND [PATTERNS]]\n" ++ "Usage: hledger [OPTIONS] [COMMAND [PATTERNS]]\n" ++
" hours [OPTIONS] [COMMAND [PATTERNS]]\n" ++ " hours [OPTIONS] [COMMAND [PATTERNS]]\n" ++
" hledger convert CSVFILE ACCOUNTNAME RULESFILE\n" ++ " hledger convert CSVFILE RULESFILE\n" ++
"\n" ++ "\n" ++
"hledger uses your ~/.ledger or $LEDGER file (or another specified with -f),\n" ++ "hledger uses your ~/.ledger or $LEDGER file (or another specified with -f),\n" ++
"while hours uses your ~/.timelog or $TIMELOG file.\n" ++ "while hours uses your ~/.timelog or $TIMELOG file.\n" ++

73
README
View File

@ -157,26 +157,35 @@ Commands
Convert Convert
,,,,,,, ,,,,,,,
The convert command converts CSV (comma-separated value) files downloaded The convert command reads a CSV_ file you have downloaded from your bank,
from a bank into ledger format. Doing a bulk import in this way can be a and prints out the transactions in ledger format, suitable for adding to
easier than entering each transaction by hand. The downside is you no your ledger. This can be a lot quicker than entering every transaction by
longer have your own data with which to detect errors by the bank. hand. (The downside is that you are less likely to notice if your bank makes an
error, so keep an eye on them!)
Usage is different from the other commands, typically it looks like this:: Use it like this::
hledger convert FILE.csv BASEACCOUNT FILE.rules >FILE.ledger hledger convert FILE.csv FILE.rules >FILE.ledger
(review FILE.ledger, then copy and paste into main ledger)
Ie convert the csv data and save the output into a similarly named ledger file. This will convert the csv data in FILE.csv using the conversion hints in
(It's not required, but convenient to use the same base filename for the FILE.rules, and save the output into a temporary ledger file. Then you
csv, rules and output files as above.) should review FILE.ledger for problems; update the rules and convert again
if needed; and finally copy/paste transactions which are new into your
main ledger.
BASEACCOUNT is the source account for these transactions, eg rules file
``assets:bank:checking``. ''''''''''
FILE.rules contains some rules to help convert the data. Here's an example:: A .rules file contains some data definitions and some rules for assigning
destination accounts to transactions. Typically you will have one csv file
and corresponding rules file per bank account. Here's an example rules
file for converting the csv download from a Wells Fargo checking account::
0,-,4,1 base-account assets:bank:checking
date-field 0
description-field 4
amount-field 1
currency $
SPECTRUM SPECTRUM
expenses:health:gym expenses:health:gym
@ -188,21 +197,33 @@ FILE.rules contains some rules to help convert the data. Here's an example::
(TO|FROM) SAVINGS (TO|FROM) SAVINGS
assets:bank:savings assets:bank:savings
It must contain: This says:
- paragraphs separated by one blank line. - the ledger account corresponding to this csv file is assets:bank:checking
- the first csv field is the date, the second is the amount, the fifth is the description
- prepend a dollar sign to the amount field
- if description contains SPECTRUM (case-insensitive), the transaction is a gym expense
- if description contains ITUNES or BLKBSTR, the transaction is an entertainment expense; also rewrite BLKBSTR as BLOCKBUSTER
- if description contains TO SAVINGS or FROM SAVINGS, the transaction is a savings transfer
- The first paragraph is a single line of four comma-separated fields, Notes:
which are numbers indicating the (0-based) csv field positions
corresponding to the transaction date, code/number, description, and amount.
If a field does not exist in the csv, use - to specify a default value.
- Other paragraphs consist of one or more regular expression patterns, one - Definitions must come first, one per line, all in one paragraph. Each
per line, followed by a line specifying the account to use when a is a name and a value separated by whitespace. Supported names are:
transaction's description matches any of these patterns. Patterns may base-account, date-field, status-field, code-field, description-field,
optionally have a replacement pattern specified after =, otherwise the amount-field, currency-field, currency. All are optional and will
matching part is used. use defaults if not specified.
- The remainder of the file is account-assigning rules. Each is a
paragraph consisting of one or more description-matching patterns
(case-insensitive regular expressions), one per line, followed by the
account name to use when the transaction's description matches any of
these patterns.
- A match pattern may optionally be followed by = and a replacement
pattern, which will become the ledger transaction's description.
Otherwise the matched part of the csv description is used. (To preserve
the full csv description, use .* before and after the match pattern.)
Smart dates Smart dates
........... ...........
@ -377,3 +398,5 @@ Other differences
.. _c++ ledger's manual: http://joyful.com/repos/ledger/doc/ledger.html .. _c++ ledger's manual: http://joyful.com/repos/ledger/doc/ledger.html
.. _binaries: http://hledger.org/binaries/ .. _binaries: http://hledger.org/binaries/
.. _Haskell Platform: http://hackage.haskell.org/platform/ .. _Haskell Platform: http://hackage.haskell.org/platform/
.. _CSV: http://en.wikipedia.org/wiki/Comma-separated_values