518 lines
18 KiB
Haskell
518 lines
18 KiB
Haskell
{-|
|
|
Convert account data in CSV format (eg downloaded from a bank) to journal
|
|
format, and print it on stdout. See the manual for more details.
|
|
-}
|
|
|
|
module Hledger.Cli.Convert where
|
|
import Control.Monad (when, guard, liftM)
|
|
import Data.List
|
|
import Data.Maybe
|
|
import Data.Ord
|
|
import Data.Time.Format (parseTime)
|
|
import Safe
|
|
import System.Directory (doesFileExist)
|
|
import System.Exit (exitFailure)
|
|
import System.FilePath (takeBaseName, replaceExtension)
|
|
import System.IO (stderr)
|
|
import System.Locale (defaultTimeLocale)
|
|
import Test.HUnit
|
|
import Text.CSV (parseCSV, parseCSVFromFile, CSV)
|
|
import Text.ParserCombinators.Parsec
|
|
import Text.Printf (hPrintf)
|
|
|
|
import Prelude hiding (getContents)
|
|
import Hledger.Utils.UTF8 (getContents)
|
|
import Hledger
|
|
import Hledger.Data.FormatStrings
|
|
import qualified Hledger.Data.FormatStrings as Format
|
|
import Hledger.Cli.Options
|
|
import Hledger.Cli.Version
|
|
|
|
nullrules = CsvRules {
|
|
dateField=Nothing,
|
|
dateFormat=Nothing,
|
|
statusField=Nothing,
|
|
codeField=Nothing,
|
|
descriptionField=[],
|
|
amountField=Nothing,
|
|
amountInField=Nothing,
|
|
amountOutField=Nothing,
|
|
currencyField=Nothing,
|
|
baseCurrency=Nothing,
|
|
accountField=Nothing,
|
|
account2Field=Nothing,
|
|
effectiveDateField=Nothing,
|
|
baseAccount="unknown",
|
|
accountRules=[]
|
|
}
|
|
|
|
type CsvRecord = [String]
|
|
|
|
|
|
-- | Read the CSV file named as an argument and print equivalent journal transactions,
|
|
-- using/creating a .rules file.
|
|
convert :: CliOpts -> IO ()
|
|
convert opts = do
|
|
let csvfile = case headDef "" $ patterns_ $ reportopts_ opts of
|
|
"" -> "-"
|
|
s -> s
|
|
usingStdin = csvfile == "-"
|
|
rulesFileSpecified = isJust $ rules_file_ opts
|
|
rulesfile = rulesFileFor opts csvfile
|
|
when (usingStdin && (not rulesFileSpecified)) $ error' "please use --rules-file to specify a rules file when converting stdin"
|
|
csvparse <- parseCsv csvfile
|
|
let records = case csvparse of
|
|
Left e -> error' $ show e
|
|
Right rs -> filter (/= [""]) rs
|
|
exists <- doesFileExist rulesfile
|
|
if (not exists) then do
|
|
hPrintf stderr "creating conversion rules file %s, edit this file for better results\n" rulesfile
|
|
writeFile rulesfile initialRulesFileContent
|
|
else
|
|
hPrintf stderr "using conversion rules file %s\n" rulesfile
|
|
rules <- liftM (either (error'.show) id) $ parseCsvRulesFile rulesfile
|
|
let invalid = validateRules rules
|
|
when (debug_ opts) $ hPrintf stderr "rules: %s\n" (show rules)
|
|
when (isJust invalid) $ error (fromJust invalid)
|
|
let requiredfields = max 2 (maxFieldIndex rules + 1)
|
|
badrecords = take 1 $ filter ((< requiredfields).length) records
|
|
if null badrecords
|
|
then do
|
|
mapM_ (putStr . show) $ sortBy (comparing tdate) $ map (transactionFromCsvRecord rules) records
|
|
else do
|
|
hPrintf stderr (unlines [
|
|
"Warning, at least one CSV record does not contain a field referenced by the"
|
|
,"conversion rules file, or has less than two fields. Are you converting a"
|
|
,"valid CSV file ? First bad record:\n%s"
|
|
]) (show $ head badrecords)
|
|
exitFailure
|
|
|
|
parseCsv :: FilePath -> IO (Either ParseError CSV)
|
|
parseCsv path =
|
|
case path of
|
|
"-" -> liftM (parseCSV "(stdin)") getContents
|
|
p -> parseCSVFromFile p
|
|
|
|
-- | The highest (0-based) field index referenced in the field
|
|
-- definitions, or -1 if no fields are defined.
|
|
maxFieldIndex :: CsvRules -> Int
|
|
maxFieldIndex r = maximumDef (-1) $ catMaybes [
|
|
dateField r
|
|
,statusField r
|
|
,codeField r
|
|
,amountField r
|
|
,amountInField r
|
|
,amountOutField r
|
|
,currencyField r
|
|
,accountField r
|
|
,account2Field r
|
|
,effectiveDateField r
|
|
]
|
|
|
|
rulesFileFor :: CliOpts -> FilePath -> FilePath
|
|
rulesFileFor CliOpts{rules_file_=Just f} _ = f
|
|
rulesFileFor CliOpts{rules_file_=Nothing} csvfile = replaceExtension csvfile ".rules"
|
|
|
|
initialRulesFileContent :: String
|
|
initialRulesFileContent =
|
|
"# csv conversion rules file generated by " ++ prognameandversion ++ "\n" ++
|
|
"# Add rules to this file for more accurate conversion, see\n"++
|
|
"# http://hledger.org/MANUAL.html#convert\n" ++
|
|
"\n" ++
|
|
"base-account assets:bank:checking\n" ++
|
|
"date-field 0\n" ++
|
|
"description-field 4\n" ++
|
|
"amount-field 1\n" ++
|
|
"base-currency $\n" ++
|
|
"\n" ++
|
|
"# account-assigning rules\n" ++
|
|
"\n" ++
|
|
"SPECTRUM\n" ++
|
|
"expenses:health:gym\n" ++
|
|
"\n" ++
|
|
"ITUNES\n" ++
|
|
"BLKBSTR=BLOCKBUSTER\n" ++
|
|
"expenses:entertainment\n" ++
|
|
"\n" ++
|
|
"(TO|FROM) SAVINGS\n" ++
|
|
"assets:bank:savings\n"
|
|
|
|
validateRules :: CsvRules -> Maybe String
|
|
validateRules rules = let
|
|
hasAmount = isJust $ amountField rules
|
|
hasIn = isJust $ amountInField rules
|
|
hasOut = isJust $ amountOutField rules
|
|
in case (hasAmount, hasIn, hasOut) of
|
|
(True, True, _) -> Just "Don't specify amount-in-field when specifying amount-field"
|
|
(True, _, True) -> Just "Don't specify amount-out-field when specifying amount-field"
|
|
(_, False, True) -> Just "Please specify amount-in-field when specifying amount-out-field"
|
|
(_, True, False) -> Just "Please specify amount-out-field when specifying amount-in-field"
|
|
(False, False, False) -> Just "Please specify either amount-field, or amount-in-field and amount-out-field"
|
|
_ -> Nothing
|
|
|
|
-- rules file parser
|
|
|
|
parseCsvRulesFile :: FilePath -> IO (Either ParseError CsvRules)
|
|
parseCsvRulesFile f = do
|
|
s <- readFile f
|
|
return $ parseCsvRules f s
|
|
|
|
parseCsvRules :: FilePath -> String -> Either ParseError CsvRules
|
|
parseCsvRules rulesfile s = runParser csvrulesfile nullrules{baseAccount=takeBaseName rulesfile} rulesfile s
|
|
|
|
csvrulesfile :: GenParser Char CsvRules CsvRules
|
|
csvrulesfile = do
|
|
many blankorcommentline
|
|
many definitions
|
|
r <- getState
|
|
ars <- many accountrule
|
|
many blankorcommentline
|
|
eof
|
|
return r{accountRules=ars}
|
|
|
|
definitions :: GenParser Char CsvRules ()
|
|
definitions = do
|
|
choice' [
|
|
datefield
|
|
,dateformat
|
|
,statusfield
|
|
,codefield
|
|
,descriptionfield
|
|
,amountfield
|
|
,amountinfield
|
|
,amountoutfield
|
|
,currencyfield
|
|
,accountfield
|
|
,account2field
|
|
,effectivedatefield
|
|
,basecurrency
|
|
,baseaccount
|
|
,commentline
|
|
] <?> "definition"
|
|
return ()
|
|
|
|
datefield = do
|
|
string "date-field"
|
|
many1 spacenonewline
|
|
v <- restofline
|
|
updateState (\r -> r{dateField=readMay v})
|
|
|
|
effectivedatefield = do
|
|
string "effective-date-field"
|
|
many1 spacenonewline
|
|
v <- restofline
|
|
updateState (\r -> r{effectiveDateField=readMay v})
|
|
|
|
dateformat = do
|
|
string "date-format"
|
|
many1 spacenonewline
|
|
v <- restofline
|
|
updateState (\r -> r{dateFormat=Just v})
|
|
|
|
codefield = do
|
|
string "code-field"
|
|
many1 spacenonewline
|
|
v <- restofline
|
|
updateState (\r -> r{codeField=readMay v})
|
|
|
|
statusfield = do
|
|
string "status-field"
|
|
many1 spacenonewline
|
|
v <- restofline
|
|
updateState (\r -> r{statusField=readMay v})
|
|
|
|
descriptionFieldValue :: GenParser Char st [FormatString]
|
|
descriptionFieldValue = do
|
|
-- try (fieldNo <* spacenonewline)
|
|
try fieldNo
|
|
<|> formatStrings
|
|
where
|
|
fieldNo = many1 digit >>= \x -> return [FormatField False Nothing Nothing $ FieldNo $ read x]
|
|
|
|
descriptionfield = do
|
|
string "description-field"
|
|
many1 spacenonewline
|
|
formatS <- descriptionFieldValue
|
|
restofline
|
|
updateState (\x -> x{descriptionField=formatS})
|
|
|
|
amountfield = do
|
|
string "amount-field"
|
|
many1 spacenonewline
|
|
v <- restofline
|
|
x <- updateState (\r -> r{amountField=readMay v})
|
|
return x
|
|
|
|
amountinfield = do
|
|
choice [string "amount-in-field", string "in-field"]
|
|
many1 spacenonewline
|
|
v <- restofline
|
|
updateState (\r -> r{amountInField=readMay v})
|
|
|
|
amountoutfield = do
|
|
choice [string "amount-out-field", string "out-field"]
|
|
many1 spacenonewline
|
|
v <- restofline
|
|
updateState (\r -> r{amountOutField=readMay v})
|
|
|
|
currencyfield = do
|
|
string "currency-field"
|
|
many1 spacenonewline
|
|
v <- restofline
|
|
updateState (\r -> r{currencyField=readMay v})
|
|
|
|
accountfield = do
|
|
string "account-field"
|
|
many1 spacenonewline
|
|
v <- restofline
|
|
updateState (\r -> r{accountField=readMay v})
|
|
|
|
account2field = do
|
|
string "account2-field"
|
|
many1 spacenonewline
|
|
v <- restofline
|
|
updateState (\r -> r{account2Field=readMay v})
|
|
|
|
basecurrency = do
|
|
choice [string "base-currency", string "currency"]
|
|
many1 spacenonewline
|
|
v <- restofline
|
|
updateState (\r -> r{baseCurrency=Just v})
|
|
|
|
baseaccount = do
|
|
string "base-account"
|
|
many1 spacenonewline
|
|
v <- ledgeraccountname
|
|
optional newline
|
|
updateState (\r -> r{baseAccount=v})
|
|
|
|
accountrule :: GenParser Char CsvRules AccountRule
|
|
accountrule = do
|
|
many blankorcommentline
|
|
pats <- many1 matchreplacepattern
|
|
guard $ length pats >= 2
|
|
let pats' = init pats
|
|
acct = either (fail.show) id $ runParser ledgeraccountname () "" $ fst $ last pats
|
|
many blankorcommentline
|
|
return (pats',acct)
|
|
<?> "account rule"
|
|
|
|
blanklines = many1 blankline
|
|
|
|
blankline = many spacenonewline >> newline >> return () <?> "blank line"
|
|
|
|
commentchar = oneOf ";#"
|
|
|
|
commentline = many spacenonewline >> commentchar >> restofline >> return () <?> "comment line"
|
|
|
|
blankorcommentline = choice' [blankline, commentline]
|
|
|
|
matchreplacepattern = do
|
|
notFollowedBy commentchar
|
|
matchpat <- many1 (noneOf "=\n")
|
|
replpat <- optionMaybe $ do {char '='; many $ noneOf "\n"}
|
|
newline
|
|
return (matchpat,replpat)
|
|
|
|
-- csv record conversion
|
|
formatD :: CsvRecord -> Bool -> Maybe Int -> Maybe Int -> HledgerFormatField -> String
|
|
formatD record leftJustified min max f = case f of
|
|
FieldNo n -> maybe "" show $ atMay record n
|
|
-- Some of these might in theory in read from fields
|
|
AccountField -> ""
|
|
DepthSpacerField -> ""
|
|
TotalField -> ""
|
|
DefaultDateField -> ""
|
|
DescriptionField -> ""
|
|
where
|
|
show = formatValue leftJustified min max
|
|
|
|
formatDescription :: CsvRecord -> [FormatString] -> String
|
|
formatDescription _ [] = ""
|
|
formatDescription record (f:fs) = s ++ (formatDescription record fs)
|
|
where s = case f of
|
|
FormatLiteral l -> l
|
|
FormatField leftJustified min max field -> formatD record leftJustified min max field
|
|
|
|
transactionFromCsvRecord :: CsvRules -> CsvRecord -> Transaction
|
|
transactionFromCsvRecord rules fields =
|
|
let
|
|
date = parsedate $ normaliseDate (dateFormat rules) $ maybe "1900/1/1" (atDef "" fields) (dateField rules)
|
|
effectivedate = do idx <- effectiveDateField rules
|
|
return $ parsedate $ normaliseDate (dateFormat rules) $ (atDef "" fields) idx
|
|
status = maybe False (null . strip . (atDef "" fields)) (statusField rules)
|
|
code = maybe "" (atDef "" fields) (codeField rules)
|
|
desc = formatDescription fields (descriptionField rules)
|
|
comment = ""
|
|
precomment = ""
|
|
baseacc = maybe (baseAccount rules) (atDef "" fields) (accountField rules)
|
|
amountstr = getAmount rules fields
|
|
amountstr' = strnegate amountstr where strnegate ('-':s) = s
|
|
strnegate s = '-':s
|
|
currency = maybe (fromMaybe "" $ baseCurrency rules) (atDef "" fields) (currencyField rules)
|
|
amountstr'' = currency ++ amountstr'
|
|
amountparse = runParser someamount nullctx "" amountstr''
|
|
amount = either (const nullmixedamt) id amountparse
|
|
-- Using costOfMixedAmount here to allow complex costs like "10 GBP @@ 15 USD".
|
|
-- Aim is to have "10 GBP @@ 15 USD" applied to account "acct", but have "-15USD" applied to "baseacct"
|
|
baseamount = costOfMixedAmount amount
|
|
unknownacct | (readDef 0 amountstr' :: Double) < 0 = "income:unknown"
|
|
| otherwise = "expenses:unknown"
|
|
(acct',newdesc) = identify (accountRules rules) unknownacct desc
|
|
acct = maybe acct' (atDef "" fields) (account2Field rules)
|
|
t = Transaction {
|
|
tdate=date,
|
|
teffectivedate=effectivedate,
|
|
tstatus=status,
|
|
tcode=code,
|
|
tdescription=newdesc,
|
|
tcomment=comment,
|
|
tpreceding_comment_lines=precomment,
|
|
tmetadata=[],
|
|
tpostings=[
|
|
Posting {
|
|
pstatus=False,
|
|
paccount=acct,
|
|
pamount=amount,
|
|
pcomment="",
|
|
ptype=RegularPosting,
|
|
pmetadata=[],
|
|
ptransaction=Just t
|
|
},
|
|
Posting {
|
|
pstatus=False,
|
|
paccount=baseacc,
|
|
pamount=(-baseamount),
|
|
pcomment="",
|
|
ptype=RegularPosting,
|
|
pmetadata=[],
|
|
ptransaction=Just t
|
|
}
|
|
]
|
|
}
|
|
in t
|
|
|
|
-- | Convert some date string with unknown format to YYYY/MM/DD.
|
|
normaliseDate :: Maybe String -- ^ User-supplied date format: this should be tried in preference to all others
|
|
-> String -> String
|
|
normaliseDate mb_user_format s =
|
|
let parsewith = flip (parseTime defaultTimeLocale) s in
|
|
maybe (error' $ "could not parse \""++s++"\" as a date, consider adding a date-format directive or upgrading")
|
|
showDate $
|
|
firstJust $ (map parsewith $
|
|
maybe [] (:[]) mb_user_format
|
|
-- the - modifier requires time-1.2.0.5, released
|
|
-- in 2011/5, so for now we emulate it for wider
|
|
-- compatibility. time < 1.2.0.5 also has a buggy
|
|
-- %y which we don't do anything about.
|
|
-- ++ [
|
|
-- "%Y/%m/%d"
|
|
-- ,"%Y/%-m/%-d"
|
|
-- ,"%Y-%m-%d"
|
|
-- ,"%Y-%-m-%-d"
|
|
-- ,"%m/%d/%Y"
|
|
-- ,"%-m/%-d/%Y"
|
|
-- ,"%m-%d-%Y"
|
|
-- ,"%-m-%-d-%Y"
|
|
-- ]
|
|
)
|
|
++ [
|
|
parseTime defaultTimeLocale "%Y/%m/%e" s
|
|
,parseTime defaultTimeLocale "%Y-%m-%e" s
|
|
,parseTime defaultTimeLocale "%m/%e/%Y" s
|
|
,parseTime defaultTimeLocale "%m-%e-%Y" s
|
|
,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s)
|
|
,parseTime defaultTimeLocale "%Y-%m-%e" (take 5 s ++ "0" ++ drop 5 s)
|
|
,parseTime defaultTimeLocale "%m/%e/%Y" ('0':s)
|
|
,parseTime defaultTimeLocale "%m-%e-%Y" ('0':s)
|
|
]
|
|
|
|
-- | Apply account matching rules to a transaction description to obtain
|
|
-- the most appropriate account and a new description.
|
|
identify :: [AccountRule] -> String -> String -> (String,String)
|
|
identify rules defacct desc | null matchingrules = (defacct,desc)
|
|
| otherwise = (acct,newdesc)
|
|
where
|
|
matchingrules = filter ismatch rules :: [AccountRule]
|
|
where ismatch = any ((`regexMatchesCI` desc) . fst) . fst
|
|
(prs,acct) = head matchingrules
|
|
p_ms_r = filter (\(_,m,_) -> m) $ map (\(p,r) -> (p, p `regexMatchesCI` desc, r)) prs
|
|
(p,_,r) = head p_ms_r
|
|
newdesc = case r of Just repl -> regexReplaceCI p repl desc
|
|
Nothing -> desc
|
|
|
|
caseinsensitive = ("(?i)"++)
|
|
|
|
getAmount :: CsvRules -> CsvRecord -> String
|
|
getAmount rules fields = case amountField rules of
|
|
Just f -> maybe "" (atDef "" fields) $ Just f
|
|
Nothing ->
|
|
case (i, o) of
|
|
(x, "") -> x
|
|
("", x) -> "-"++x
|
|
p -> error' $ "using amount-in-field and amount-out-field, found a value in both fields: "++show p
|
|
where
|
|
i = maybe "" (atDef "" fields) (amountInField rules)
|
|
o = maybe "" (atDef "" fields) (amountOutField rules)
|
|
|
|
tests_Hledger_Cli_Convert = TestList (test_parser ++ test_description_parsing)
|
|
|
|
test_description_parsing = [
|
|
"description-field 1" ~: assertParseDescription "description-field 1\n" [FormatField False Nothing Nothing (FieldNo 1)]
|
|
, "description-field 1 " ~: assertParseDescription "description-field 1 \n" [FormatField False Nothing Nothing (FieldNo 1)]
|
|
, "description-field %(1)" ~: assertParseDescription "description-field %(1)\n" [FormatField False Nothing Nothing (FieldNo 1)]
|
|
, "description-field %(1)/$(2)" ~: assertParseDescription "description-field %(1)/%(2)\n" [
|
|
FormatField False Nothing Nothing (FieldNo 1)
|
|
, FormatLiteral "/"
|
|
, FormatField False Nothing Nothing (FieldNo 2)
|
|
]
|
|
]
|
|
where
|
|
assertParseDescription string expected = do assertParseEqual (parseDescription string) (nullrules {descriptionField = expected})
|
|
parseDescription :: String -> Either ParseError CsvRules
|
|
parseDescription x = runParser descriptionfieldWrapper nullrules "(unknown)" x
|
|
descriptionfieldWrapper :: GenParser Char CsvRules CsvRules
|
|
descriptionfieldWrapper = do
|
|
descriptionfield
|
|
r <- getState
|
|
return r
|
|
|
|
test_parser = [
|
|
|
|
"convert rules parsing: empty file" ~: do
|
|
-- let assertMixedAmountParse parseresult mixedamount =
|
|
-- (either (const "parse error") showMixedAmountDebug parseresult) ~?= (showMixedAmountDebug mixedamount)
|
|
assertParseEqual (parseCsvRules "unknown" "") nullrules
|
|
|
|
,"convert rules parsing: accountrule" ~: do
|
|
assertParseEqual (parseWithCtx nullrules accountrule "A\na\n") -- leading blank line required
|
|
([("A",Nothing)], "a")
|
|
|
|
,"convert rules parsing: trailing comments" ~: do
|
|
assertParse (parseWithCtx nullrules csvrulesfile "A\na\n# \n#\n")
|
|
|
|
,"convert rules parsing: trailing blank lines" ~: do
|
|
assertParse (parseWithCtx nullrules csvrulesfile "A\na\n\n \n")
|
|
|
|
-- not supported
|
|
-- ,"convert rules parsing: no final newline" ~: do
|
|
-- assertParse (parseWithCtx nullrules csvrulesfile "A\na")
|
|
-- assertParse (parseWithCtx nullrules csvrulesfile "A\na\n# \n#")
|
|
-- assertParse (parseWithCtx nullrules csvrulesfile "A\na\n\n ")
|
|
|
|
-- (nullrules{
|
|
-- -- dateField=Maybe FieldPosition,
|
|
-- -- statusField=Maybe FieldPosition,
|
|
-- -- codeField=Maybe FieldPosition,
|
|
-- -- descriptionField=Maybe FieldPosition,
|
|
-- -- amountField=Maybe FieldPosition,
|
|
-- -- currencyField=Maybe FieldPosition,
|
|
-- -- baseCurrency=Maybe String,
|
|
-- -- baseAccount=AccountName,
|
|
-- accountRules=[
|
|
-- ([("A",Nothing)], "a")
|
|
-- ]
|
|
-- })
|
|
|
|
]
|