386 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Haskell
		
	
	
	
	
	
			
		
		
	
	
			386 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Haskell
		
	
	
	
	
	
| {-|
 | |
| Convert account data in CSV format (eg downloaded from a bank) to ledger
 | |
| format, and print it on stdout. See the manual for more details.
 | |
| -}
 | |
| 
 | |
| module Hledger.Cli.Commands.Convert where
 | |
| import Hledger.Cli.Options (Opt(Debug))
 | |
| import Hledger.Cli.Version (versionstr)
 | |
| import Hledger.Data.Types (Ledger,AccountName,Transaction(..),Posting(..),PostingType(..))
 | |
| import Hledger.Data.Utils (strip, spacenonewline, restofline, parseWithCtx, assertParse, assertParseEqual)
 | |
| import Hledger.Data.Parse (someamount, emptyCtx, ledgeraccountname)
 | |
| import Hledger.Data.Amount (nullmixedamt)
 | |
| import Safe (atDef, maximumDef)
 | |
| import System.IO (stderr)
 | |
| import Text.CSV (parseCSVFromFile, printCSV)
 | |
| import Text.Printf (hPrintf)
 | |
| import Text.RegexPR (matchRegexPR, gsubRegexPR)
 | |
| import Data.Maybe
 | |
| import Hledger.Data.Dates (firstJust, showDate, parsedate)
 | |
| import System.Locale (defaultTimeLocale)
 | |
| import Data.Time.Format (parseTime)
 | |
| import Control.Monad (when, guard, liftM)
 | |
| import Safe (readDef, readMay)
 | |
| import System.Directory (doesFileExist)
 | |
| import System.Exit (exitFailure)
 | |
| import System.FilePath.Posix (takeBaseName, replaceExtension)
 | |
| import Text.ParserCombinators.Parsec
 | |
| import Test.HUnit
 | |
| 
 | |
| 
 | |
| {- |
 | |
| A set of data definitions and account-matching patterns sufficient to
 | |
| convert a particular CSV data file into meaningful ledger transactions. See above.
 | |
| -}
 | |
| data CsvRules = CsvRules {
 | |
|       dateField :: Maybe FieldPosition,
 | |
|       statusField :: Maybe FieldPosition,
 | |
|       codeField :: Maybe FieldPosition,
 | |
|       descriptionField :: Maybe FieldPosition,
 | |
|       amountField :: Maybe FieldPosition,
 | |
|       currencyField :: Maybe FieldPosition,
 | |
|       baseCurrency :: Maybe String,
 | |
|       baseAccount :: AccountName,
 | |
|       accountRules :: [AccountRule]
 | |
| } deriving (Show, Eq)
 | |
| 
 | |
| nullrules = CsvRules {
 | |
|       dateField=Nothing,
 | |
|       statusField=Nothing,
 | |
|       codeField=Nothing,
 | |
|       descriptionField=Nothing,
 | |
|       amountField=Nothing,
 | |
|       currencyField=Nothing,
 | |
|       baseCurrency=Nothing,
 | |
|       baseAccount="unknown",
 | |
|       accountRules=[]
 | |
| }
 | |
| 
 | |
| type FieldPosition = Int
 | |
| 
 | |
| type AccountRule = (
 | |
|    [(String, Maybe String)] -- list of regex match patterns with optional replacements
 | |
|   ,AccountName              -- account name to use for a transaction matching this rule
 | |
|   )
 | |
| 
 | |
| type CsvRecord = [String]
 | |
| 
 | |
| 
 | |
| -- | Read the CSV file named as an argument and print equivalent ledger transactions,
 | |
| -- using/creating a .rules file.
 | |
| convert :: [Opt] -> [String] -> Ledger -> IO ()
 | |
| convert opts args _ = do
 | |
|   when (null args) $ error "please specify a csv data file."
 | |
|   let csvfile = head args
 | |
|   csvparse <- parseCSVFromFile csvfile
 | |
|   let records = case csvparse of
 | |
|                   Left e -> error $ show e
 | |
|                   Right rs -> reverse $ filter (/= [""]) rs
 | |
|   let debug = Debug `elem` opts
 | |
|       rulesfile = rulesFileFor csvfile
 | |
|   exists <- doesFileExist rulesfile
 | |
|   if (not exists) then do
 | |
|                   hPrintf stderr "creating conversion rules file %s, edit this file for better results\n" rulesfile
 | |
|                   writeFile rulesfile initialRulesFileContent
 | |
|    else
 | |
|       hPrintf stderr "using conversion rules file %s\n" rulesfile
 | |
|   rules <- liftM (either (error.show) id) $ parseCsvRulesFile rulesfile
 | |
|   when debug $ hPrintf stderr "rules: %s\n" (show rules)
 | |
|   let requiredfields = max 2 (maxFieldIndex rules + 1)
 | |
|       badrecords = take 1 $ filter ((< requiredfields).length) records
 | |
|   if null badrecords
 | |
|    then mapM_ (printTxn debug rules) records
 | |
|    else do
 | |
|      hPrintf stderr (unlines [
 | |
|                       "Warning, at least one CSV record does not contain a field referenced by the"
 | |
|                      ,"conversion rules file, or has less than two fields. Are you converting a"
 | |
|                      ,"valid CSV file ? First bad record:\n%s"
 | |
|                      ]) (show $ head badrecords)
 | |
|      exitFailure
 | |
| 
 | |
| -- | The highest (0-based) field index referenced in the field
 | |
| -- definitions, or -1 if no fields are defined.
 | |
| maxFieldIndex :: CsvRules -> Int
 | |
| maxFieldIndex r = maximumDef (-1) $ catMaybes [
 | |
|                    dateField r
 | |
|                   ,statusField r
 | |
|                   ,codeField r
 | |
|                   ,descriptionField r
 | |
|                   ,amountField r
 | |
|                   ,currencyField r
 | |
|                   ]
 | |
| 
 | |
| rulesFileFor :: FilePath -> FilePath
 | |
| rulesFileFor csvfile = replaceExtension csvfile ".rules"
 | |
| 
 | |
| initialRulesFileContent :: String
 | |
| initialRulesFileContent =
 | |
|     "# csv conversion rules file generated by hledger "++versionstr++"\n" ++
 | |
|     "# Add rules to this file for more accurate conversion, see\n"++
 | |
|     "# http://hledger.org/MANUAL.html#convert\n" ++
 | |
|     "\n" ++
 | |
|     "base-account assets:bank:checking\n" ++
 | |
|     "date-field 0\n" ++
 | |
|     "description-field 4\n" ++
 | |
|     "amount-field 1\n" ++
 | |
|     "currency $\n" ++
 | |
|     "\n" ++
 | |
|     "# account-assigning rules\n" ++
 | |
|     "\n" ++
 | |
|     "SPECTRUM\n" ++
 | |
|     "expenses:health:gym\n" ++
 | |
|     "\n" ++
 | |
|     "ITUNES\n" ++
 | |
|     "BLKBSTR=BLOCKBUSTER\n" ++
 | |
|     "expenses:entertainment\n" ++
 | |
|     "\n" ++
 | |
|     "(TO|FROM) SAVINGS\n" ++
 | |
|     "assets:bank:savings\n"
 | |
| 
 | |
| -- rules file parser
 | |
| 
 | |
| parseCsvRulesFile :: FilePath -> IO (Either ParseError CsvRules)
 | |
| parseCsvRulesFile f = do
 | |
|   s <- readFile f
 | |
|   return $ parseCsvRules f s
 | |
| 
 | |
| parseCsvRules :: FilePath -> String -> Either ParseError CsvRules
 | |
| parseCsvRules rulesfile s = runParser csvrulesfile nullrules{baseAccount=takeBaseName rulesfile} rulesfile s
 | |
| 
 | |
| csvrulesfile :: GenParser Char CsvRules CsvRules
 | |
| csvrulesfile = do
 | |
|   many blankorcommentline
 | |
|   many definitions
 | |
|   r <- getState
 | |
|   ars <- many accountrule
 | |
|   many blankorcommentline
 | |
|   eof
 | |
|   return r{accountRules=ars}
 | |
| 
 | |
| -- | Real independent parser choice, even when alternative matches share a prefix.
 | |
| choice' parsers = choice $ map try (init parsers) ++ [last parsers]
 | |
| 
 | |
| definitions :: GenParser Char CsvRules ()
 | |
| definitions = do
 | |
|   choice' [
 | |
|     datefield
 | |
|    ,statusfield
 | |
|    ,codefield
 | |
|    ,descriptionfield
 | |
|    ,amountfield
 | |
|    ,currencyfield
 | |
|    ,basecurrency
 | |
|    ,baseaccount
 | |
|    ,commentline
 | |
|    ] <?> "definition"
 | |
|   return ()
 | |
| 
 | |
| datefield = do
 | |
|   string "date-field"
 | |
|   many1 spacenonewline
 | |
|   v <- restofline
 | |
|   r <- getState
 | |
|   setState r{dateField=readMay v}
 | |
| 
 | |
| codefield = do
 | |
|   string "code-field"
 | |
|   many1 spacenonewline
 | |
|   v <- restofline
 | |
|   r <- getState
 | |
|   setState r{codeField=readMay v}
 | |
| 
 | |
| statusfield = do
 | |
|   string "status-field"
 | |
|   many1 spacenonewline
 | |
|   v <- restofline
 | |
|   r <- getState
 | |
|   setState r{statusField=readMay v}
 | |
| 
 | |
| descriptionfield = do
 | |
|   string "description-field"
 | |
|   many1 spacenonewline
 | |
|   v <- restofline
 | |
|   r <- getState
 | |
|   setState r{descriptionField=readMay v}
 | |
| 
 | |
| amountfield = do
 | |
|   string "amount-field"
 | |
|   many1 spacenonewline
 | |
|   v <- restofline
 | |
|   r <- getState
 | |
|   setState r{amountField=readMay v}
 | |
| 
 | |
| currencyfield = do
 | |
|   string "currency-field"
 | |
|   many1 spacenonewline
 | |
|   v <- restofline
 | |
|   r <- getState
 | |
|   setState r{currencyField=readMay v}
 | |
| 
 | |
| basecurrency = do
 | |
|   string "currency"
 | |
|   many1 spacenonewline
 | |
|   v <- restofline
 | |
|   r <- getState
 | |
|   setState r{baseCurrency=Just v}
 | |
| 
 | |
| baseaccount = do
 | |
|   string "base-account"
 | |
|   many1 spacenonewline
 | |
|   v <- ledgeraccountname
 | |
|   optional newline
 | |
|   r <- getState
 | |
|   setState r{baseAccount=v}
 | |
| 
 | |
| accountrule :: GenParser Char CsvRules AccountRule
 | |
| accountrule = do
 | |
|   many blankorcommentline
 | |
|   pats <- many1 matchreplacepattern
 | |
|   guard $ length pats >= 2
 | |
|   let pats' = init pats
 | |
|       acct = either (fail.show) id $ runParser ledgeraccountname () "" $ fst $ last pats
 | |
|   many blankorcommentline
 | |
|   return (pats',acct)
 | |
|  <?> "account rule"
 | |
| 
 | |
| blanklines = many1 blankline >> return ()
 | |
| 
 | |
| blankline = many spacenonewline >> newline >> return () <?> "blank line"
 | |
| 
 | |
| commentchar = oneOf ";#"
 | |
| 
 | |
| commentline = many spacenonewline >> commentchar >> restofline >> return () <?> "comment line"
 | |
| 
 | |
| blankorcommentline = choice' [blankline, commentline]
 | |
| 
 | |
| matchreplacepattern = do
 | |
|   notFollowedBy commentchar
 | |
|   matchpat <- many1 (noneOf "=\n")
 | |
|   replpat <- optionMaybe $ do {char '='; many $ noneOf "\n"}
 | |
|   newline
 | |
|   return (matchpat,replpat)
 | |
| 
 | |
| printTxn :: Bool -> CsvRules -> CsvRecord -> IO ()
 | |
| printTxn debug rules rec = do
 | |
|   when debug $ hPrintf stderr "record: %s" (printCSV [rec])
 | |
|   putStr $ show $ transactionFromCsvRecord rules rec
 | |
| 
 | |
| -- csv record conversion
 | |
| 
 | |
| transactionFromCsvRecord :: CsvRules -> CsvRecord -> Transaction
 | |
| transactionFromCsvRecord rules fields =
 | |
|   let 
 | |
|       date = parsedate $ normaliseDate $ maybe "1900/1/1" (atDef "" fields) (dateField rules)
 | |
|       status = maybe False (null . strip . (atDef "" fields)) (statusField rules)
 | |
|       code = maybe "" (atDef "" fields) (codeField rules)
 | |
|       desc = maybe "" (atDef "" fields) (descriptionField rules)
 | |
|       comment = ""
 | |
|       precomment = ""
 | |
|       amountstr = maybe "" (atDef "" fields) (amountField rules)
 | |
|       amountstr' = strnegate amountstr where strnegate ('-':s) = s
 | |
|                                              strnegate s = '-':s
 | |
|       currency = maybe (fromMaybe "" $ baseCurrency rules) (atDef "" fields) (currencyField rules)
 | |
|       amountstr'' = currency ++ amountstr'
 | |
|       amountparse = runParser someamount emptyCtx "" amountstr''
 | |
|       amount = either (const nullmixedamt) id amountparse
 | |
|       unknownacct | (readDef 0 amountstr' :: Double) < 0 = "income:unknown"
 | |
|                   | otherwise = "expenses:unknown"
 | |
|       (acct,newdesc) = identify (accountRules rules) unknownacct desc
 | |
|       t = Transaction {
 | |
|               tdate=date,
 | |
|               teffectivedate=Nothing,
 | |
|               tstatus=status,
 | |
|               tcode=code,
 | |
|               tdescription=newdesc,
 | |
|               tcomment=comment,
 | |
|               tpreceding_comment_lines=precomment,
 | |
|               tpostings=[
 | |
|                    Posting {
 | |
|                      pstatus=False,
 | |
|                      paccount=acct,
 | |
|                      pamount=amount,
 | |
|                      pcomment="",
 | |
|                      ptype=RegularPosting,
 | |
|                      ptransaction=Just t
 | |
|                    },
 | |
|                    Posting {
 | |
|                      pstatus=False,
 | |
|                      paccount=baseAccount rules,
 | |
|                      pamount=(-amount),
 | |
|                      pcomment="",
 | |
|                      ptype=RegularPosting,
 | |
|                      ptransaction=Just t
 | |
|                    }
 | |
|                   ]
 | |
|             }
 | |
|   in t
 | |
| 
 | |
| -- | Convert some date string with unknown format to YYYY/MM/DD.
 | |
| normaliseDate :: String -> String
 | |
| normaliseDate s = maybe "0000/00/00" showDate $
 | |
|               firstJust
 | |
|               [parseTime defaultTimeLocale "%Y/%m/%e" s
 | |
|                -- can't parse a month without leading 0, try adding one
 | |
|               ,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s)
 | |
|               ,parseTime defaultTimeLocale "%Y-%m-%e" s
 | |
|               ,parseTime defaultTimeLocale "%Y-%m-%e" (take 5 s ++ "0" ++ drop 5 s)
 | |
|               ,parseTime defaultTimeLocale "%m/%e/%Y" s
 | |
|               ,parseTime defaultTimeLocale "%m/%e/%Y" ('0':s)
 | |
|               ,parseTime defaultTimeLocale "%m-%e-%Y" s
 | |
|               ,parseTime defaultTimeLocale "%m-%e-%Y" ('0':s)
 | |
|               ]
 | |
| 
 | |
| -- | Apply account matching rules to a transaction description to obtain
 | |
| -- the most appropriate account and a new description.
 | |
| identify :: [AccountRule] -> String -> String -> (String,String)
 | |
| identify rules defacct desc | null matchingrules = (defacct,desc)
 | |
|                             | otherwise = (acct,newdesc)
 | |
|     where
 | |
|       matchingrules = filter ismatch rules :: [AccountRule]
 | |
|           where ismatch = any (isJust . flip matchRegexPR (caseinsensitive desc) . fst) . fst
 | |
|       (prs,acct) = head matchingrules
 | |
|       p_ms_r = filter (\(_,m,_) -> isJust m) $ map (\(p,r) -> (p, matchRegexPR (caseinsensitive p) desc, r)) prs
 | |
|       (p,_,r) = head p_ms_r
 | |
|       newdesc = case r of Just rpat -> gsubRegexPR (caseinsensitive p) rpat desc
 | |
|                           Nothing   -> desc
 | |
| 
 | |
| caseinsensitive = ("(?i)"++)
 | |
| 
 | |
| tests_Convert = TestList [
 | |
| 
 | |
|    "convert rules parsing: empty file" ~: do
 | |
|      -- let assertMixedAmountParse parseresult mixedamount =
 | |
|      --         (either (const "parse error") showMixedAmountDebug parseresult) ~?= (showMixedAmountDebug mixedamount)
 | |
|     assertParseEqual (parseCsvRules "unknown" "") nullrules
 | |
| 
 | |
|   ,"convert rules parsing: accountrule" ~: do
 | |
|      assertParseEqual (parseWithCtx nullrules accountrule "A\na\n") -- leading blank line required
 | |
|                  ([("A",Nothing)], "a")
 | |
| 
 | |
|   ,"convert rules parsing: trailing comments" ~: do
 | |
|      assertParse (parseWithCtx nullrules csvrulesfile "A\na\n# \n#\n")
 | |
| 
 | |
|   ,"convert rules parsing: trailing blank lines" ~: do
 | |
|      assertParse (parseWithCtx nullrules csvrulesfile "A\na\n\n  \n")
 | |
| 
 | |
|   -- not supported
 | |
|   -- ,"convert rules parsing: no final newline" ~: do
 | |
|   --    assertParse (parseWithCtx nullrules csvrulesfile "A\na")
 | |
|   --    assertParse (parseWithCtx nullrules csvrulesfile "A\na\n# \n#")
 | |
|   --    assertParse (parseWithCtx nullrules csvrulesfile "A\na\n\n  ")
 | |
| 
 | |
|                  -- (nullrules{
 | |
|                  --   -- dateField=Maybe FieldPosition,
 | |
|                  --   -- statusField=Maybe FieldPosition,
 | |
|                  --   -- codeField=Maybe FieldPosition,
 | |
|                  --   -- descriptionField=Maybe FieldPosition,
 | |
|                  --   -- amountField=Maybe FieldPosition,
 | |
|                  --   -- currencyField=Maybe FieldPosition,
 | |
|                  --   -- baseCurrency=Maybe String,
 | |
|                  --   -- baseAccount=AccountName,
 | |
|                  --   accountRules=[
 | |
|                  --        ([("A",Nothing)], "a")
 | |
|                  --       ]
 | |
|                  --  })
 | |
| 
 | |
|   ] |