387 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Haskell
		
	
	
	
	
	
			
		
		
	
	
			387 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Haskell
		
	
	
	
	
	
{-|
 | 
						|
Convert account data in CSV format (eg downloaded from a bank) to journal
 | 
						|
format, and print it on stdout. See the manual for more details.
 | 
						|
-}
 | 
						|
 | 
						|
module Hledger.Cli.Commands.Convert where
 | 
						|
import Hledger.Cli.Options (Opt(Debug))
 | 
						|
import Hledger.Cli.Version (versionstr)
 | 
						|
import Hledger.Data.Types (Journal,AccountName,Transaction(..),Posting(..),PostingType(..))
 | 
						|
import Hledger.Data.Utils (strip, spacenonewline, restofline, parseWithCtx, assertParse, assertParseEqual)
 | 
						|
import Hledger.Read.Common (emptyCtx)
 | 
						|
import Hledger.Read.Journal (someamount,ledgeraccountname)
 | 
						|
import Hledger.Data.Amount (nullmixedamt)
 | 
						|
import Safe (atDef, maximumDef)
 | 
						|
import System.IO (stderr)
 | 
						|
import Text.CSV (parseCSVFromFile, printCSV)
 | 
						|
import Text.Printf (hPrintf)
 | 
						|
import Text.RegexPR (matchRegexPR, gsubRegexPR)
 | 
						|
import Data.Maybe
 | 
						|
import Hledger.Data.Dates (firstJust, showDate, parsedate)
 | 
						|
import System.Locale (defaultTimeLocale)
 | 
						|
import Data.Time.Format (parseTime)
 | 
						|
import Control.Monad (when, guard, liftM)
 | 
						|
import Safe (readDef, readMay)
 | 
						|
import System.Directory (doesFileExist)
 | 
						|
import System.Exit (exitFailure)
 | 
						|
import System.FilePath (takeBaseName, replaceExtension)
 | 
						|
import Text.ParserCombinators.Parsec
 | 
						|
import Test.HUnit
 | 
						|
 | 
						|
 | 
						|
{- |
 | 
						|
A set of data definitions and account-matching patterns sufficient to
 | 
						|
convert a particular CSV data file into meaningful journal transactions. See above.
 | 
						|
-}
 | 
						|
data CsvRules = CsvRules {
 | 
						|
      dateField :: Maybe FieldPosition,
 | 
						|
      statusField :: Maybe FieldPosition,
 | 
						|
      codeField :: Maybe FieldPosition,
 | 
						|
      descriptionField :: Maybe FieldPosition,
 | 
						|
      amountField :: Maybe FieldPosition,
 | 
						|
      currencyField :: Maybe FieldPosition,
 | 
						|
      baseCurrency :: Maybe String,
 | 
						|
      baseAccount :: AccountName,
 | 
						|
      accountRules :: [AccountRule]
 | 
						|
} deriving (Show, Eq)
 | 
						|
 | 
						|
nullrules = CsvRules {
 | 
						|
      dateField=Nothing,
 | 
						|
      statusField=Nothing,
 | 
						|
      codeField=Nothing,
 | 
						|
      descriptionField=Nothing,
 | 
						|
      amountField=Nothing,
 | 
						|
      currencyField=Nothing,
 | 
						|
      baseCurrency=Nothing,
 | 
						|
      baseAccount="unknown",
 | 
						|
      accountRules=[]
 | 
						|
}
 | 
						|
 | 
						|
type FieldPosition = Int
 | 
						|
 | 
						|
type AccountRule = (
 | 
						|
   [(String, Maybe String)] -- list of regex match patterns with optional replacements
 | 
						|
  ,AccountName              -- account name to use for a transaction matching this rule
 | 
						|
  )
 | 
						|
 | 
						|
type CsvRecord = [String]
 | 
						|
 | 
						|
 | 
						|
-- | Read the CSV file named as an argument and print equivalent journal transactions,
 | 
						|
-- using/creating a .rules file.
 | 
						|
convert :: [Opt] -> [String] -> Journal -> IO ()
 | 
						|
convert opts args _ = do
 | 
						|
  when (null args) $ error "please specify a csv data file."
 | 
						|
  let csvfile = head args
 | 
						|
  csvparse <- parseCSVFromFile csvfile
 | 
						|
  let records = case csvparse of
 | 
						|
                  Left e -> error $ show e
 | 
						|
                  Right rs -> reverse $ filter (/= [""]) rs
 | 
						|
  let debug = Debug `elem` opts
 | 
						|
      rulesfile = rulesFileFor csvfile
 | 
						|
  exists <- doesFileExist rulesfile
 | 
						|
  if (not exists) then do
 | 
						|
                  hPrintf stderr "creating conversion rules file %s, edit this file for better results\n" rulesfile
 | 
						|
                  writeFile rulesfile initialRulesFileContent
 | 
						|
   else
 | 
						|
      hPrintf stderr "using conversion rules file %s\n" rulesfile
 | 
						|
  rules <- liftM (either (error.show) id) $ parseCsvRulesFile rulesfile
 | 
						|
  when debug $ hPrintf stderr "rules: %s\n" (show rules)
 | 
						|
  let requiredfields = max 2 (maxFieldIndex rules + 1)
 | 
						|
      badrecords = take 1 $ filter ((< requiredfields).length) records
 | 
						|
  if null badrecords
 | 
						|
   then mapM_ (printTxn debug rules) records
 | 
						|
   else do
 | 
						|
     hPrintf stderr (unlines [
 | 
						|
                      "Warning, at least one CSV record does not contain a field referenced by the"
 | 
						|
                     ,"conversion rules file, or has less than two fields. Are you converting a"
 | 
						|
                     ,"valid CSV file ? First bad record:\n%s"
 | 
						|
                     ]) (show $ head badrecords)
 | 
						|
     exitFailure
 | 
						|
 | 
						|
-- | The highest (0-based) field index referenced in the field
 | 
						|
-- definitions, or -1 if no fields are defined.
 | 
						|
maxFieldIndex :: CsvRules -> Int
 | 
						|
maxFieldIndex r = maximumDef (-1) $ catMaybes [
 | 
						|
                   dateField r
 | 
						|
                  ,statusField r
 | 
						|
                  ,codeField r
 | 
						|
                  ,descriptionField r
 | 
						|
                  ,amountField r
 | 
						|
                  ,currencyField r
 | 
						|
                  ]
 | 
						|
 | 
						|
rulesFileFor :: FilePath -> FilePath
 | 
						|
rulesFileFor csvfile = replaceExtension csvfile ".rules"
 | 
						|
 | 
						|
initialRulesFileContent :: String
 | 
						|
initialRulesFileContent =
 | 
						|
    "# csv conversion rules file generated by hledger "++versionstr++"\n" ++
 | 
						|
    "# Add rules to this file for more accurate conversion, see\n"++
 | 
						|
    "# http://hledger.org/MANUAL.html#convert\n" ++
 | 
						|
    "\n" ++
 | 
						|
    "base-account assets:bank:checking\n" ++
 | 
						|
    "date-field 0\n" ++
 | 
						|
    "description-field 4\n" ++
 | 
						|
    "amount-field 1\n" ++
 | 
						|
    "currency $\n" ++
 | 
						|
    "\n" ++
 | 
						|
    "# account-assigning rules\n" ++
 | 
						|
    "\n" ++
 | 
						|
    "SPECTRUM\n" ++
 | 
						|
    "expenses:health:gym\n" ++
 | 
						|
    "\n" ++
 | 
						|
    "ITUNES\n" ++
 | 
						|
    "BLKBSTR=BLOCKBUSTER\n" ++
 | 
						|
    "expenses:entertainment\n" ++
 | 
						|
    "\n" ++
 | 
						|
    "(TO|FROM) SAVINGS\n" ++
 | 
						|
    "assets:bank:savings\n"
 | 
						|
 | 
						|
-- rules file parser
 | 
						|
 | 
						|
parseCsvRulesFile :: FilePath -> IO (Either ParseError CsvRules)
 | 
						|
parseCsvRulesFile f = do
 | 
						|
  s <- readFile f
 | 
						|
  return $ parseCsvRules f s
 | 
						|
 | 
						|
parseCsvRules :: FilePath -> String -> Either ParseError CsvRules
 | 
						|
parseCsvRules rulesfile s = runParser csvrulesfile nullrules{baseAccount=takeBaseName rulesfile} rulesfile s
 | 
						|
 | 
						|
csvrulesfile :: GenParser Char CsvRules CsvRules
 | 
						|
csvrulesfile = do
 | 
						|
  many blankorcommentline
 | 
						|
  many definitions
 | 
						|
  r <- getState
 | 
						|
  ars <- many accountrule
 | 
						|
  many blankorcommentline
 | 
						|
  eof
 | 
						|
  return r{accountRules=ars}
 | 
						|
 | 
						|
-- | Real independent parser choice, even when alternative matches share a prefix.
 | 
						|
choice' parsers = choice $ map try (init parsers) ++ [last parsers]
 | 
						|
 | 
						|
definitions :: GenParser Char CsvRules ()
 | 
						|
definitions = do
 | 
						|
  choice' [
 | 
						|
    datefield
 | 
						|
   ,statusfield
 | 
						|
   ,codefield
 | 
						|
   ,descriptionfield
 | 
						|
   ,amountfield
 | 
						|
   ,currencyfield
 | 
						|
   ,basecurrency
 | 
						|
   ,baseaccount
 | 
						|
   ,commentline
 | 
						|
   ] <?> "definition"
 | 
						|
  return ()
 | 
						|
 | 
						|
datefield = do
 | 
						|
  string "date-field"
 | 
						|
  many1 spacenonewline
 | 
						|
  v <- restofline
 | 
						|
  r <- getState
 | 
						|
  setState r{dateField=readMay v}
 | 
						|
 | 
						|
codefield = do
 | 
						|
  string "code-field"
 | 
						|
  many1 spacenonewline
 | 
						|
  v <- restofline
 | 
						|
  r <- getState
 | 
						|
  setState r{codeField=readMay v}
 | 
						|
 | 
						|
statusfield = do
 | 
						|
  string "status-field"
 | 
						|
  many1 spacenonewline
 | 
						|
  v <- restofline
 | 
						|
  r <- getState
 | 
						|
  setState r{statusField=readMay v}
 | 
						|
 | 
						|
descriptionfield = do
 | 
						|
  string "description-field"
 | 
						|
  many1 spacenonewline
 | 
						|
  v <- restofline
 | 
						|
  r <- getState
 | 
						|
  setState r{descriptionField=readMay v}
 | 
						|
 | 
						|
amountfield = do
 | 
						|
  string "amount-field"
 | 
						|
  many1 spacenonewline
 | 
						|
  v <- restofline
 | 
						|
  r <- getState
 | 
						|
  setState r{amountField=readMay v}
 | 
						|
 | 
						|
currencyfield = do
 | 
						|
  string "currency-field"
 | 
						|
  many1 spacenonewline
 | 
						|
  v <- restofline
 | 
						|
  r <- getState
 | 
						|
  setState r{currencyField=readMay v}
 | 
						|
 | 
						|
basecurrency = do
 | 
						|
  string "currency"
 | 
						|
  many1 spacenonewline
 | 
						|
  v <- restofline
 | 
						|
  r <- getState
 | 
						|
  setState r{baseCurrency=Just v}
 | 
						|
 | 
						|
baseaccount = do
 | 
						|
  string "base-account"
 | 
						|
  many1 spacenonewline
 | 
						|
  v <- ledgeraccountname
 | 
						|
  optional newline
 | 
						|
  r <- getState
 | 
						|
  setState r{baseAccount=v}
 | 
						|
 | 
						|
accountrule :: GenParser Char CsvRules AccountRule
 | 
						|
accountrule = do
 | 
						|
  many blankorcommentline
 | 
						|
  pats <- many1 matchreplacepattern
 | 
						|
  guard $ length pats >= 2
 | 
						|
  let pats' = init pats
 | 
						|
      acct = either (fail.show) id $ runParser ledgeraccountname () "" $ fst $ last pats
 | 
						|
  many blankorcommentline
 | 
						|
  return (pats',acct)
 | 
						|
 <?> "account rule"
 | 
						|
 | 
						|
blanklines = many1 blankline >> return ()
 | 
						|
 | 
						|
blankline = many spacenonewline >> newline >> return () <?> "blank line"
 | 
						|
 | 
						|
commentchar = oneOf ";#"
 | 
						|
 | 
						|
commentline = many spacenonewline >> commentchar >> restofline >> return () <?> "comment line"
 | 
						|
 | 
						|
blankorcommentline = choice' [blankline, commentline]
 | 
						|
 | 
						|
matchreplacepattern = do
 | 
						|
  notFollowedBy commentchar
 | 
						|
  matchpat <- many1 (noneOf "=\n")
 | 
						|
  replpat <- optionMaybe $ do {char '='; many $ noneOf "\n"}
 | 
						|
  newline
 | 
						|
  return (matchpat,replpat)
 | 
						|
 | 
						|
printTxn :: Bool -> CsvRules -> CsvRecord -> IO ()
 | 
						|
printTxn debug rules rec = do
 | 
						|
  when debug $ hPrintf stderr "record: %s" (printCSV [rec])
 | 
						|
  putStr $ show $ transactionFromCsvRecord rules rec
 | 
						|
 | 
						|
-- csv record conversion
 | 
						|
 | 
						|
transactionFromCsvRecord :: CsvRules -> CsvRecord -> Transaction
 | 
						|
transactionFromCsvRecord rules fields =
 | 
						|
  let 
 | 
						|
      date = parsedate $ normaliseDate $ maybe "1900/1/1" (atDef "" fields) (dateField rules)
 | 
						|
      status = maybe False (null . strip . (atDef "" fields)) (statusField rules)
 | 
						|
      code = maybe "" (atDef "" fields) (codeField rules)
 | 
						|
      desc = maybe "" (atDef "" fields) (descriptionField rules)
 | 
						|
      comment = ""
 | 
						|
      precomment = ""
 | 
						|
      amountstr = maybe "" (atDef "" fields) (amountField rules)
 | 
						|
      amountstr' = strnegate amountstr where strnegate ('-':s) = s
 | 
						|
                                             strnegate s = '-':s
 | 
						|
      currency = maybe (fromMaybe "" $ baseCurrency rules) (atDef "" fields) (currencyField rules)
 | 
						|
      amountstr'' = currency ++ amountstr'
 | 
						|
      amountparse = runParser someamount emptyCtx "" amountstr''
 | 
						|
      amount = either (const nullmixedamt) id amountparse
 | 
						|
      unknownacct | (readDef 0 amountstr' :: Double) < 0 = "income:unknown"
 | 
						|
                  | otherwise = "expenses:unknown"
 | 
						|
      (acct,newdesc) = identify (accountRules rules) unknownacct desc
 | 
						|
      t = Transaction {
 | 
						|
              tdate=date,
 | 
						|
              teffectivedate=Nothing,
 | 
						|
              tstatus=status,
 | 
						|
              tcode=code,
 | 
						|
              tdescription=newdesc,
 | 
						|
              tcomment=comment,
 | 
						|
              tpreceding_comment_lines=precomment,
 | 
						|
              tpostings=[
 | 
						|
                   Posting {
 | 
						|
                     pstatus=False,
 | 
						|
                     paccount=acct,
 | 
						|
                     pamount=amount,
 | 
						|
                     pcomment="",
 | 
						|
                     ptype=RegularPosting,
 | 
						|
                     ptransaction=Just t
 | 
						|
                   },
 | 
						|
                   Posting {
 | 
						|
                     pstatus=False,
 | 
						|
                     paccount=baseAccount rules,
 | 
						|
                     pamount=(-amount),
 | 
						|
                     pcomment="",
 | 
						|
                     ptype=RegularPosting,
 | 
						|
                     ptransaction=Just t
 | 
						|
                   }
 | 
						|
                  ]
 | 
						|
            }
 | 
						|
  in t
 | 
						|
 | 
						|
-- | Convert some date string with unknown format to YYYY/MM/DD.
 | 
						|
normaliseDate :: String -> String
 | 
						|
normaliseDate s = maybe "0000/00/00" showDate $
 | 
						|
              firstJust
 | 
						|
              [parseTime defaultTimeLocale "%Y/%m/%e" s
 | 
						|
               -- can't parse a month without leading 0, try adding one
 | 
						|
              ,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s)
 | 
						|
              ,parseTime defaultTimeLocale "%Y-%m-%e" s
 | 
						|
              ,parseTime defaultTimeLocale "%Y-%m-%e" (take 5 s ++ "0" ++ drop 5 s)
 | 
						|
              ,parseTime defaultTimeLocale "%m/%e/%Y" s
 | 
						|
              ,parseTime defaultTimeLocale "%m/%e/%Y" ('0':s)
 | 
						|
              ,parseTime defaultTimeLocale "%m-%e-%Y" s
 | 
						|
              ,parseTime defaultTimeLocale "%m-%e-%Y" ('0':s)
 | 
						|
              ]
 | 
						|
 | 
						|
-- | Apply account matching rules to a transaction description to obtain
 | 
						|
-- the most appropriate account and a new description.
 | 
						|
identify :: [AccountRule] -> String -> String -> (String,String)
 | 
						|
identify rules defacct desc | null matchingrules = (defacct,desc)
 | 
						|
                            | otherwise = (acct,newdesc)
 | 
						|
    where
 | 
						|
      matchingrules = filter ismatch rules :: [AccountRule]
 | 
						|
          where ismatch = any (isJust . flip matchRegexPR (caseinsensitive desc) . fst) . fst
 | 
						|
      (prs,acct) = head matchingrules
 | 
						|
      p_ms_r = filter (\(_,m,_) -> isJust m) $ map (\(p,r) -> (p, matchRegexPR (caseinsensitive p) desc, r)) prs
 | 
						|
      (p,_,r) = head p_ms_r
 | 
						|
      newdesc = case r of Just rpat -> gsubRegexPR (caseinsensitive p) rpat desc
 | 
						|
                          Nothing   -> desc
 | 
						|
 | 
						|
caseinsensitive = ("(?i)"++)
 | 
						|
 | 
						|
tests_Convert = TestList [
 | 
						|
 | 
						|
   "convert rules parsing: empty file" ~: do
 | 
						|
     -- let assertMixedAmountParse parseresult mixedamount =
 | 
						|
     --         (either (const "parse error") showMixedAmountDebug parseresult) ~?= (showMixedAmountDebug mixedamount)
 | 
						|
    assertParseEqual (parseCsvRules "unknown" "") nullrules
 | 
						|
 | 
						|
  ,"convert rules parsing: accountrule" ~: do
 | 
						|
     assertParseEqual (parseWithCtx nullrules accountrule "A\na\n") -- leading blank line required
 | 
						|
                 ([("A",Nothing)], "a")
 | 
						|
 | 
						|
  ,"convert rules parsing: trailing comments" ~: do
 | 
						|
     assertParse (parseWithCtx nullrules csvrulesfile "A\na\n# \n#\n")
 | 
						|
 | 
						|
  ,"convert rules parsing: trailing blank lines" ~: do
 | 
						|
     assertParse (parseWithCtx nullrules csvrulesfile "A\na\n\n  \n")
 | 
						|
 | 
						|
  -- not supported
 | 
						|
  -- ,"convert rules parsing: no final newline" ~: do
 | 
						|
  --    assertParse (parseWithCtx nullrules csvrulesfile "A\na")
 | 
						|
  --    assertParse (parseWithCtx nullrules csvrulesfile "A\na\n# \n#")
 | 
						|
  --    assertParse (parseWithCtx nullrules csvrulesfile "A\na\n\n  ")
 | 
						|
 | 
						|
                 -- (nullrules{
 | 
						|
                 --   -- dateField=Maybe FieldPosition,
 | 
						|
                 --   -- statusField=Maybe FieldPosition,
 | 
						|
                 --   -- codeField=Maybe FieldPosition,
 | 
						|
                 --   -- descriptionField=Maybe FieldPosition,
 | 
						|
                 --   -- amountField=Maybe FieldPosition,
 | 
						|
                 --   -- currencyField=Maybe FieldPosition,
 | 
						|
                 --   -- baseCurrency=Maybe String,
 | 
						|
                 --   -- baseAccount=AccountName,
 | 
						|
                 --   accountRules=[
 | 
						|
                 --        ([("A",Nothing)], "a")
 | 
						|
                 --       ]
 | 
						|
                 --  })
 | 
						|
 | 
						|
  ] |