convert: more robust csv parsing, specify null field positions with -

This commit is contained in:
Simon Michael 2009-11-18 07:29:44 +00:00
parent 2855fe9e0a
commit f35184583a

View File

@ -6,16 +6,23 @@ format, and print it on stdout.
Usage: hledger convert CSVFILE ACCOUNTNAME RULESFILE Usage: hledger convert CSVFILE ACCOUNTNAME RULESFILE
ACCOUNTNAME is the base account to use for transactions. RULESFILE ACCOUNTNAME is the base account to use for transactions. RULESFILE
provides some rules to help convert the data. It should contain paragraphs provides some rules to help convert the data.
separated by one blank line. The first paragraph is a single line of five
comma-separated numbers, which are the csv field positions corresponding - It should contain paragraphs separated by one blank line.
to the ledger transaction's date, status, code, description, and amount.
All other paragraphs specify one or more regular expressions, followed by - The first paragraph is a single line of four comma-separated fields,
the ledger account to use when a transaction's description matches any of which are numbers indicating the (0-based) csv field positions
them. A regexp may optionally have a replacement pattern specified after =. corresponding to the transaction date, code/number, description, and amount.
If a field does not exist in the csv, use - to specify a default value.
- All other paragraphs specify one or more regular expressions, followed
by the ledger account to use when a transaction's description matches
any of them. A regexp may optionally have a replacement pattern
specified after =.
Here's an example rules file: Here's an example rules file:
> 0,2,3,4,1 > 0,-,4,1
> >
> ATM DEPOSIT > ATM DEPOSIT
> assets:bank:checking > assets:bank:checking
@ -27,13 +34,6 @@ Here's an example rules file:
> BLKBSTR=BLOCKBUSTER > BLKBSTR=BLOCKBUSTER
> expenses:entertainment > expenses:entertainment
Roadmap:
Support for other formats will be added. To update a ledger file, pipe the
output into the import command. The rules will move to a hledger config
file. When no rule matches, accounts will be guessed based on similarity
to descriptions in the current ledger, with interactive prompting and
optional rule saving.
-} -}
module Commands.Convert where module Commands.Convert where
@ -50,6 +50,7 @@ import Ledger.Dates (firstJust, showDate)
import Locale (defaultTimeLocale) import Locale (defaultTimeLocale)
import Data.Time.Format (parseTime) import Data.Time.Format (parseTime)
import Control.Monad (when) import Control.Monad (when)
import Safe (readMay, readDef)
convert :: [Opt] -> [String] -> Ledger -> IO () convert :: [Opt] -> [String] -> Ledger -> IO ()
@ -70,11 +71,11 @@ type Rule = (
,AccountName -- account name to use for a matched transaction ,AccountName -- account name to use for a matched transaction
) )
parseRules :: String -> IO ([Int],[Rule]) parseRules :: String -> IO ([Maybe Int],[Rule])
parseRules s = do parseRules s = do
let ls = map strip $ lines s let ls = map strip $ lines s
let paras = splitOn [""] ls let paras = splitOn [""] ls
let fieldpositions = map read $ splitOn "," $ head $ head paras let fieldpositions = map readMay $ splitOn "," $ head $ head paras
let rules = [(map parsePatRepl $ init ls, last ls) | ls <- tail paras] let rules = [(map parsePatRepl $ init ls, last ls) | ls <- tail paras]
return (fieldpositions,rules) return (fieldpositions,rules)
@ -83,21 +84,27 @@ parsePatRepl l = case splitOn "=" l of
(p:r:_) -> (p, Just r) (p:r:_) -> (p, Just r)
_ -> (l, Nothing) _ -> (l, Nothing)
print_ledger_txn :: Bool -> (String,[Int],[Rule]) -> [String] -> IO () print_ledger_txn :: Bool -> (String,[Maybe Int],[Rule]) -> [String] -> IO ()
print_ledger_txn debug (baseacct,fieldpositions,rules) record@(_:_:_:_:_:[]) = do print_ledger_txn _ (_,[],_) _ = return ()
let [date,_,number,description,amount] = map (record !!) fieldpositions print_ledger_txn _ (('#':_),_,_) _ = return ()
print_ledger_txn debug (baseacct,fieldpositions,rules) csvrecord
| length csvrecord < maximum (map (fromMaybe 0) fieldpositions) + 1 = return ()
| otherwise =
do
when debug $ hPutStrLn stderr $ show csvrecord
let date = maybe "" (csvrecord !!) (fieldpositions !! 0)
number = maybe "" (csvrecord !!) (fieldpositions !! 1)
description = maybe "" (csvrecord !!) (fieldpositions !! 2)
amount = maybe "" (csvrecord !!) (fieldpositions !! 3)
amount' = strnegate amount where strnegate ('-':s) = s amount' = strnegate amount where strnegate ('-':s) = s
strnegate s = '-':s strnegate s = '-':s
unknownacct | (read amount' :: Double) < 0 = "income:unknown" unknownacct | (readDef 0 amount' :: Double) < 0 = "income:unknown"
| otherwise = "expenses:unknown" | otherwise = "expenses:unknown"
(acct,desc) = choose_acct_desc rules (unknownacct,description) (acct,desc) = choose_acct_desc rules (unknownacct,description)
when (debug) $ hPutStrLn stderr $ printf "using %s for %s" desc description when (debug) $ hPutStrLn stderr $ printf "using %s for %s" desc description
putStrLn $ printf "%s%s %s" (fixdate date) (if not (null number) then printf " (%s)" number else "") desc putStrLn $ printf "%s%s %s" (fixdate date) (if not (null number) then printf " (%s)" number else "") desc
putStrLn $ printf " %-30s %15s" acct (printf "$%s" amount' :: String) putStrLn $ printf " %-30s %15s" acct (printf "$%s" amount' :: String)
putStrLn $ printf " %s\n" baseacct putStrLn $ printf " %s\n" baseacct
print_ledger_txn True _ record =
hPutStrLn stderr $ printf "ignoring %s" $ show record
print_ledger_txn _ _ _ = return ()
choose_acct_desc :: [Rule] -> (String,String) -> (String,String) choose_acct_desc :: [Rule] -> (String,String) -> (String,String)
choose_acct_desc rules (acct,desc) | null matchingrules = (acct,desc) choose_acct_desc rules (acct,desc) | null matchingrules = (acct,desc)
@ -116,9 +123,14 @@ matchregex = matchRegexPR . ("(?i)" ++)
fixdate :: String -> String fixdate :: String -> String
fixdate s = maybe "0000/00/00" showDate $ fixdate s = maybe "0000/00/00" showDate $
firstJust firstJust
[parseTime defaultTimeLocale "%Y/%m/%d" s [parseTime defaultTimeLocale "%Y/%m/%e" s
,parseTime defaultTimeLocale "%Y-%m-%d" s -- can't parse a month without leading 0, try adding onee
,parseTime defaultTimeLocale "%m/%d/%Y" s ,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s)
,parseTime defaultTimeLocale "%m-%d-%Y" s ,parseTime defaultTimeLocale "%Y-%m-%e" s
,parseTime defaultTimeLocale "%Y-%m-%e" (take 5 s ++ "0" ++ drop 5 s)
,parseTime defaultTimeLocale "%m/%e/%Y" s
,parseTime defaultTimeLocale "%m/%e/%Y" ('0':s)
,parseTime defaultTimeLocale "%m-%e-%Y" s
,parseTime defaultTimeLocale "%m-%e-%Y" ('0':s)
] ]