convert: more robust csv parsing, specify null field positions with -
This commit is contained in:
		
							parent
							
								
									2855fe9e0a
								
							
						
					
					
						commit
						f35184583a
					
				@ -6,16 +6,23 @@ format, and print it on stdout.
 | 
				
			|||||||
Usage: hledger convert CSVFILE ACCOUNTNAME RULESFILE
 | 
					Usage: hledger convert CSVFILE ACCOUNTNAME RULESFILE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ACCOUNTNAME is the base account to use for transactions.  RULESFILE
 | 
					ACCOUNTNAME is the base account to use for transactions.  RULESFILE
 | 
				
			||||||
provides some rules to help convert the data. It should contain paragraphs
 | 
					provides some rules to help convert the data.
 | 
				
			||||||
separated by one blank line.  The first paragraph is a single line of five
 | 
					
 | 
				
			||||||
comma-separated numbers, which are the csv field positions corresponding
 | 
					- It should contain paragraphs separated by one blank line.
 | 
				
			||||||
to the ledger transaction's date, status, code, description, and amount.
 | 
					
 | 
				
			||||||
All other paragraphs specify one or more regular expressions, followed by
 | 
					- The first paragraph is a single line of four comma-separated fields,
 | 
				
			||||||
the ledger account to use when a transaction's description matches any of
 | 
					  which are numbers indicating the (0-based) csv field positions
 | 
				
			||||||
them. A regexp may optionally have a replacement pattern specified after =.
 | 
					  corresponding to the transaction date, code/number, description, and amount.
 | 
				
			||||||
 | 
					  If a field does not exist in the csv, use - to specify a default value.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- All other paragraphs specify one or more regular expressions, followed
 | 
				
			||||||
 | 
					  by the ledger account to use when a transaction's description matches
 | 
				
			||||||
 | 
					  any of them. A regexp may optionally have a replacement pattern
 | 
				
			||||||
 | 
					  specified after =.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Here's an example rules file:
 | 
					Here's an example rules file:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
> 0,2,3,4,1
 | 
					> 0,-,4,1
 | 
				
			||||||
>
 | 
					>
 | 
				
			||||||
> ATM DEPOSIT
 | 
					> ATM DEPOSIT
 | 
				
			||||||
> assets:bank:checking
 | 
					> assets:bank:checking
 | 
				
			||||||
@ -27,13 +34,6 @@ Here's an example rules file:
 | 
				
			|||||||
> BLKBSTR=BLOCKBUSTER
 | 
					> BLKBSTR=BLOCKBUSTER
 | 
				
			||||||
> expenses:entertainment
 | 
					> expenses:entertainment
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Roadmap: 
 | 
					 | 
				
			||||||
Support for other formats will be added. To update a ledger file, pipe the
 | 
					 | 
				
			||||||
output into the import command. The rules will move to a hledger config
 | 
					 | 
				
			||||||
file. When no rule matches, accounts will be guessed based on similarity
 | 
					 | 
				
			||||||
to descriptions in the current ledger, with interactive prompting and
 | 
					 | 
				
			||||||
optional rule saving.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-}
 | 
					-}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
module Commands.Convert where
 | 
					module Commands.Convert where
 | 
				
			||||||
@ -50,6 +50,7 @@ import Ledger.Dates (firstJust, showDate)
 | 
				
			|||||||
import Locale (defaultTimeLocale)
 | 
					import Locale (defaultTimeLocale)
 | 
				
			||||||
import Data.Time.Format (parseTime)
 | 
					import Data.Time.Format (parseTime)
 | 
				
			||||||
import Control.Monad (when)
 | 
					import Control.Monad (when)
 | 
				
			||||||
 | 
					import Safe (readMay, readDef)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
convert :: [Opt] -> [String] -> Ledger -> IO ()
 | 
					convert :: [Opt] -> [String] -> Ledger -> IO ()
 | 
				
			||||||
@ -70,11 +71,11 @@ type Rule = (
 | 
				
			|||||||
  ,AccountName              -- account name to use for a matched transaction
 | 
					  ,AccountName              -- account name to use for a matched transaction
 | 
				
			||||||
  )
 | 
					  )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
parseRules :: String -> IO ([Int],[Rule])
 | 
					parseRules :: String -> IO ([Maybe Int],[Rule])
 | 
				
			||||||
parseRules s = do
 | 
					parseRules s = do
 | 
				
			||||||
  let ls = map strip $ lines s
 | 
					  let ls = map strip $ lines s
 | 
				
			||||||
  let paras = splitOn [""] ls
 | 
					  let paras = splitOn [""] ls
 | 
				
			||||||
  let fieldpositions = map read $ splitOn "," $ head $ head paras
 | 
					  let fieldpositions = map readMay $ splitOn "," $ head $ head paras
 | 
				
			||||||
  let rules = [(map parsePatRepl $ init ls, last ls) | ls <- tail paras]
 | 
					  let rules = [(map parsePatRepl $ init ls, last ls) | ls <- tail paras]
 | 
				
			||||||
  return (fieldpositions,rules)
 | 
					  return (fieldpositions,rules)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -83,21 +84,27 @@ parsePatRepl l = case splitOn "=" l of
 | 
				
			|||||||
                   (p:r:_) -> (p, Just r)
 | 
					                   (p:r:_) -> (p, Just r)
 | 
				
			||||||
                   _       -> (l, Nothing)
 | 
					                   _       -> (l, Nothing)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
print_ledger_txn :: Bool -> (String,[Int],[Rule]) -> [String] -> IO ()
 | 
					print_ledger_txn :: Bool -> (String,[Maybe Int],[Rule]) -> [String] -> IO ()
 | 
				
			||||||
print_ledger_txn debug (baseacct,fieldpositions,rules) record@(_:_:_:_:_:[]) = do
 | 
					print_ledger_txn _ (_,[],_) _ = return ()
 | 
				
			||||||
  let [date,_,number,description,amount] = map (record !!) fieldpositions
 | 
					print_ledger_txn _ (('#':_),_,_) _ = return ()
 | 
				
			||||||
 | 
					print_ledger_txn debug (baseacct,fieldpositions,rules) csvrecord
 | 
				
			||||||
 | 
					    | length csvrecord < maximum (map (fromMaybe 0) fieldpositions) + 1 = return ()
 | 
				
			||||||
 | 
					    | otherwise =
 | 
				
			||||||
 | 
					 do
 | 
				
			||||||
 | 
					  when debug $ hPutStrLn stderr $ show csvrecord
 | 
				
			||||||
 | 
					  let date = maybe "" (csvrecord !!) (fieldpositions !! 0)
 | 
				
			||||||
 | 
					      number = maybe "" (csvrecord !!) (fieldpositions !! 1)
 | 
				
			||||||
 | 
					      description = maybe "" (csvrecord !!) (fieldpositions !! 2)
 | 
				
			||||||
 | 
					      amount = maybe "" (csvrecord !!) (fieldpositions !! 3)
 | 
				
			||||||
      amount' = strnegate amount where strnegate ('-':s) = s
 | 
					      amount' = strnegate amount where strnegate ('-':s) = s
 | 
				
			||||||
                                       strnegate s = '-':s
 | 
					                                       strnegate s = '-':s
 | 
				
			||||||
      unknownacct | (read amount' :: Double) < 0 = "income:unknown"
 | 
					      unknownacct | (readDef 0 amount' :: Double) < 0 = "income:unknown"
 | 
				
			||||||
                  | otherwise = "expenses:unknown"
 | 
					                  | otherwise = "expenses:unknown"
 | 
				
			||||||
      (acct,desc) = choose_acct_desc rules (unknownacct,description)
 | 
					      (acct,desc) = choose_acct_desc rules (unknownacct,description)
 | 
				
			||||||
  when (debug) $ hPutStrLn stderr $ printf "using %s for %s" desc description
 | 
					  when (debug) $ hPutStrLn stderr $ printf "using %s for %s" desc description
 | 
				
			||||||
  putStrLn $ printf "%s%s %s" (fixdate date) (if not (null number) then printf " (%s)" number else "") desc
 | 
					  putStrLn $ printf "%s%s %s" (fixdate date) (if not (null number) then printf " (%s)" number else "") desc
 | 
				
			||||||
  putStrLn $ printf "    %-30s  %15s" acct (printf "$%s" amount' :: String)
 | 
					  putStrLn $ printf "    %-30s  %15s" acct (printf "$%s" amount' :: String)
 | 
				
			||||||
  putStrLn $ printf "    %s\n" baseacct
 | 
					  putStrLn $ printf "    %s\n" baseacct
 | 
				
			||||||
print_ledger_txn True _ record =
 | 
					 | 
				
			||||||
  hPutStrLn stderr $ printf "ignoring %s" $ show record
 | 
					 | 
				
			||||||
print_ledger_txn _ _ _ = return ()
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
choose_acct_desc :: [Rule] -> (String,String) -> (String,String)
 | 
					choose_acct_desc :: [Rule] -> (String,String) -> (String,String)
 | 
				
			||||||
choose_acct_desc rules (acct,desc) | null matchingrules = (acct,desc)
 | 
					choose_acct_desc rules (acct,desc) | null matchingrules = (acct,desc)
 | 
				
			||||||
@ -116,9 +123,14 @@ matchregex = matchRegexPR . ("(?i)" ++)
 | 
				
			|||||||
fixdate :: String -> String
 | 
					fixdate :: String -> String
 | 
				
			||||||
fixdate s = maybe "0000/00/00" showDate $ 
 | 
					fixdate s = maybe "0000/00/00" showDate $ 
 | 
				
			||||||
              firstJust
 | 
					              firstJust
 | 
				
			||||||
              [parseTime defaultTimeLocale "%Y/%m/%d" s
 | 
					              [parseTime defaultTimeLocale "%Y/%m/%e" s
 | 
				
			||||||
              ,parseTime defaultTimeLocale "%Y-%m-%d" s
 | 
					               -- can't parse a month without leading 0, try adding onee
 | 
				
			||||||
              ,parseTime defaultTimeLocale "%m/%d/%Y" s
 | 
					              ,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s)
 | 
				
			||||||
              ,parseTime defaultTimeLocale "%m-%d-%Y" s
 | 
					              ,parseTime defaultTimeLocale "%Y-%m-%e" s
 | 
				
			||||||
 | 
					              ,parseTime defaultTimeLocale "%Y-%m-%e" (take 5 s ++ "0" ++ drop 5 s)
 | 
				
			||||||
 | 
					              ,parseTime defaultTimeLocale "%m/%e/%Y" s
 | 
				
			||||||
 | 
					              ,parseTime defaultTimeLocale "%m/%e/%Y" ('0':s)
 | 
				
			||||||
 | 
					              ,parseTime defaultTimeLocale "%m-%e-%Y" s
 | 
				
			||||||
 | 
					              ,parseTime defaultTimeLocale "%m-%e-%Y" ('0':s)
 | 
				
			||||||
              ]
 | 
					              ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user