imp:print:beancount: conversion improvements

- omit balance assertions
- replace more currency symbols, and match within symbols like C$
- do more account validation, and error if conversion is too hard
- backslash-escape double quotes and backslashes in payee and note
This commit is contained in:
Simon Michael 2023-11-23 01:47:15 -10:00
parent 0b8e920810
commit 4b13af0634
5 changed files with 64 additions and 19 deletions

View File

@ -73,6 +73,7 @@ import Text.DocLayout (realLength)
import Hledger.Data.Types
import Hledger.Utils
import Data.Char (isDigit, isLetter)
-- $setup
-- >>> :set -XOverloadedStrings
@ -349,18 +350,46 @@ accountNameToAccountOnlyRegexCI a = toRegexCI' $ "^" <> escapeName a <> "$" -- P
--isAccountRegex s = take 1 s == "^" && take 5 (reverse s) == ")$|:("
type BeancountAccountName = AccountName
type BeancountAccountNameComponent = AccountName
-- Convert a hledger account name to a valid Beancount account name.
-- It capitalises each part, and if the first part is not one of
-- Assets, Liabilities, Equity, Income, Expenses, it prepends Equity:.
-- It replaces non-supported characters with @-@ (warning: in extreme cases
-- separate accounts could end up with the same name), and it capitalises
-- each account name part. It also checks that the first part is one of
-- Assets, Liabilities, Equity, Income, or Expenses, and if not it raises an error.
-- Account aliases (eg --alias) should be used to set these required
-- top-level account names if needed.
accountNameToBeancount :: AccountName -> BeancountAccountName
accountNameToBeancount a =
-- https://beancount.github.io/docs/beancount_language_syntax.html#accounts
accountNameFromComponents $
case map textCapitalise $ accountNameComponents a of
[] -> []
c:cs | c `elem` beancountTopLevelAccounts -> c:cs
cs -> "Equity" : cs
case map (accountNameComponentToBeancount a) $ accountNameComponents a of
c:_ | c `notElem` beancountTopLevelAccounts -> error' e
where
e = T.unpack $ T.unlines [
beancountAccountErrorMessage a,
"For Beancount output, all top-level accounts must be (or be aliased to) one of",
T.intercalate ", " beancountTopLevelAccounts <> "."
]
cs -> cs
accountNameComponentToBeancount :: AccountName -> AccountName -> BeancountAccountNameComponent
accountNameComponentToBeancount acct part =
case T.uncons part of
Just (c,_) | not $ isLetter c -> error' e
where
e = unlines [
T.unpack $ beancountAccountErrorMessage acct,
"For Beancount output, each account name part must begin with a letter."
]
_ -> textCapitalise part'
where part' = T.map (\c -> if isBeancountAccountChar c then c else '-') part
beancountAccountErrorMessage :: AccountName -> Text
beancountAccountErrorMessage a = "Could not convert \"" <> a <> "\" to a Beancount account name."
isBeancountAccountChar :: Char -> Bool
isBeancountAccountChar c = c `elem` ("-:"::[Char]) || isLetter c || isDigit c
beancountTopLevelAccounts = ["Assets", "Liabilities", "Equity", "Income", "Expenses"]

View File

@ -342,10 +342,9 @@ postingAsLinesBeancount elideamount acctwidth amtwidth p =
render [ textCell BottomLeft statusandaccount
, textCell BottomLeft " "
, Cell BottomLeft [pad amt]
, Cell BottomLeft [assertion]
, textCell BottomLeft samelinecomment
]
| (amt,assertion) <- shownAmountsAssertions]
| (amt,_assertion) <- shownAmountsAssertions]
render = renderRow def{tableBorders=False, borderSpaces=False} . Group NoLine . map Header
pad amt = WideBuilder (TB.fromText $ T.replicate w " ") w <> amt
where w = max 12 amtwidth - wbWidth amt -- min. 12 for backwards compatibility
@ -384,13 +383,17 @@ type BeancountAmount = Amount
-- | Do some best effort adjustments to make an amount that renders
-- in a way that Beancount can read: forces the commodity symbol to the right,
-- converts $ to USD.
-- converts a few currency symbols to names, capitalises all letters.
amountToBeancount :: Amount -> BeancountAmount
amountToBeancount a@Amount{acommodity=c,astyle=s,aprice=mp} = a{acommodity=c', astyle=s', aprice=mp'}
-- https://beancount.github.io/docs/beancount_language_syntax.html#commodities-currencies
where
c' | c=="$" = "USD"
| otherwise = c
c' = T.toUpper $
T.replace "$" "USD" $
T.replace "" "EUR" $
T.replace "¥" "JPY" $
T.replace "£" "GBP" $
c
s' = s{ascommodityside=R, ascommodityspaced=True}
mp' = costToBeancount <$> mp
where

View File

@ -193,11 +193,11 @@ showTransactionBeancount t =
(payee,note) =
case payeeAndNoteFromDescription' $ tdescription t of
("","") -> ("", "" )
(p ,"") -> (wrapq p, wrapq "")
("",n ) -> ("" , wrapq n )
(p ,"") -> (wrapq p, wrapq "")
(p ,n ) -> (wrapq p, wrapq n )
where
wrapq = wrap " \"" "\""
wrapq = wrap " \"" "\"" . escapeDoubleQuotes . escapeBackslash
tags = T.concat $ map ((" #"<>).fst) $ ttags t
(samelinecomment, newlinecomments) =
case renderCommentLines (tcomment t) of [] -> ("",[])

View File

@ -19,6 +19,7 @@ module Hledger.Utils.Text
-- quotechars,
-- whitespacechars,
escapeDoubleQuotes,
escapeBackslash,
-- escapeSingleQuotes,
-- escapeQuotes,
-- words',
@ -139,6 +140,9 @@ whitespacechars = " \t\n\r"
escapeDoubleQuotes :: T.Text -> T.Text
escapeDoubleQuotes = T.replace "\"" "\\\""
escapeBackslash :: T.Text -> T.Text
escapeBackslash = T.replace "\\" "\\\\"
-- escapeSingleQuotes :: T.Text -> T.Text
-- escapeSingleQuotes = T.replace "'" "\'"

View File

@ -118,17 +118,26 @@ The output formats supported are
`txt`, `beancount`, `csv`, `tsv`, `json` and `sql`.
*Experimental:*
The `beancount` format tries to produce Beancount-compatible output.
It is very basic and may require additional manual fixups:
The `beancount` format tries to produce Beancount-compatible output, as follows:
- Transaction and postings with unmarked status are converted to cleared (`*`) status.
- Transactions' payee and note are wrapped in double quotes.
- Transactions' payee and note are backslash-escaped and double-quote-escaped and wrapped in double quotes.
- Transaction tags are copied to Beancount #tag format.
- Account name parts are capitalised, and if the first account name part
is not one of Assets, Liabilities, Equity, Income, or Expenses, "Equity:" is prepended.
- The `$` commodity symbol is converted to `USD`.
- Commodity symbols are converted to upper case, and a small number of currency symbols
like `$` are converted to the corresponding currency names.
- Account name parts are capitalised and unsupported characters are replaced with `-`.
If an account name part does not begin with a letter, or if the first part
is not Assets, Liabilities, Equity, Income, or Expenses, an error is raised.
(Use `--alias` options to bring your accounts into compliance.)
- An `open` directive is generated for each account used, on the earliest transaction date.
Some limitations:
- Balance assertions are removed.
- Balance assignments become missing amounts.
- Virtual and balanced virtual postings become regular postings.
- Directives are not converted.
Here's an example of print's CSV output:
```shell