imp:print:beancount: conversion improvements

- omit balance assertions
- replace more currency symbols, and match within symbols like C$
- do more account validation, and error if conversion is too hard
- backslash-escape double quotes and backslashes in payee and note
This commit is contained in:
Simon Michael 2023-11-23 01:47:15 -10:00
parent 0b8e920810
commit 4b13af0634
5 changed files with 64 additions and 19 deletions

View File

@ -73,6 +73,7 @@ import Text.DocLayout (realLength)
import Hledger.Data.Types import Hledger.Data.Types
import Hledger.Utils import Hledger.Utils
import Data.Char (isDigit, isLetter)
-- $setup -- $setup
-- >>> :set -XOverloadedStrings -- >>> :set -XOverloadedStrings
@ -349,18 +350,46 @@ accountNameToAccountOnlyRegexCI a = toRegexCI' $ "^" <> escapeName a <> "$" -- P
--isAccountRegex s = take 1 s == "^" && take 5 (reverse s) == ")$|:(" --isAccountRegex s = take 1 s == "^" && take 5 (reverse s) == ")$|:("
type BeancountAccountName = AccountName type BeancountAccountName = AccountName
type BeancountAccountNameComponent = AccountName
-- Convert a hledger account name to a valid Beancount account name. -- Convert a hledger account name to a valid Beancount account name.
-- It capitalises each part, and if the first part is not one of -- It replaces non-supported characters with @-@ (warning: in extreme cases
-- Assets, Liabilities, Equity, Income, Expenses, it prepends Equity:. -- separate accounts could end up with the same name), and it capitalises
-- each account name part. It also checks that the first part is one of
-- Assets, Liabilities, Equity, Income, or Expenses, and if not it raises an error.
-- Account aliases (eg --alias) should be used to set these required
-- top-level account names if needed.
accountNameToBeancount :: AccountName -> BeancountAccountName accountNameToBeancount :: AccountName -> BeancountAccountName
accountNameToBeancount a = accountNameToBeancount a =
-- https://beancount.github.io/docs/beancount_language_syntax.html#accounts -- https://beancount.github.io/docs/beancount_language_syntax.html#accounts
accountNameFromComponents $ accountNameFromComponents $
case map textCapitalise $ accountNameComponents a of case map (accountNameComponentToBeancount a) $ accountNameComponents a of
[] -> [] c:_ | c `notElem` beancountTopLevelAccounts -> error' e
c:cs | c `elem` beancountTopLevelAccounts -> c:cs where
cs -> "Equity" : cs e = T.unpack $ T.unlines [
beancountAccountErrorMessage a,
"For Beancount output, all top-level accounts must be (or be aliased to) one of",
T.intercalate ", " beancountTopLevelAccounts <> "."
]
cs -> cs
accountNameComponentToBeancount :: AccountName -> AccountName -> BeancountAccountNameComponent
accountNameComponentToBeancount acct part =
case T.uncons part of
Just (c,_) | not $ isLetter c -> error' e
where
e = unlines [
T.unpack $ beancountAccountErrorMessage acct,
"For Beancount output, each account name part must begin with a letter."
]
_ -> textCapitalise part'
where part' = T.map (\c -> if isBeancountAccountChar c then c else '-') part
beancountAccountErrorMessage :: AccountName -> Text
beancountAccountErrorMessage a = "Could not convert \"" <> a <> "\" to a Beancount account name."
isBeancountAccountChar :: Char -> Bool
isBeancountAccountChar c = c `elem` ("-:"::[Char]) || isLetter c || isDigit c
beancountTopLevelAccounts = ["Assets", "Liabilities", "Equity", "Income", "Expenses"] beancountTopLevelAccounts = ["Assets", "Liabilities", "Equity", "Income", "Expenses"]

View File

@ -342,10 +342,9 @@ postingAsLinesBeancount elideamount acctwidth amtwidth p =
render [ textCell BottomLeft statusandaccount render [ textCell BottomLeft statusandaccount
, textCell BottomLeft " " , textCell BottomLeft " "
, Cell BottomLeft [pad amt] , Cell BottomLeft [pad amt]
, Cell BottomLeft [assertion]
, textCell BottomLeft samelinecomment , textCell BottomLeft samelinecomment
] ]
| (amt,assertion) <- shownAmountsAssertions] | (amt,_assertion) <- shownAmountsAssertions]
render = renderRow def{tableBorders=False, borderSpaces=False} . Group NoLine . map Header render = renderRow def{tableBorders=False, borderSpaces=False} . Group NoLine . map Header
pad amt = WideBuilder (TB.fromText $ T.replicate w " ") w <> amt pad amt = WideBuilder (TB.fromText $ T.replicate w " ") w <> amt
where w = max 12 amtwidth - wbWidth amt -- min. 12 for backwards compatibility where w = max 12 amtwidth - wbWidth amt -- min. 12 for backwards compatibility
@ -384,13 +383,17 @@ type BeancountAmount = Amount
-- | Do some best effort adjustments to make an amount that renders -- | Do some best effort adjustments to make an amount that renders
-- in a way that Beancount can read: forces the commodity symbol to the right, -- in a way that Beancount can read: forces the commodity symbol to the right,
-- converts $ to USD. -- converts a few currency symbols to names, capitalises all letters.
amountToBeancount :: Amount -> BeancountAmount amountToBeancount :: Amount -> BeancountAmount
amountToBeancount a@Amount{acommodity=c,astyle=s,aprice=mp} = a{acommodity=c', astyle=s', aprice=mp'} amountToBeancount a@Amount{acommodity=c,astyle=s,aprice=mp} = a{acommodity=c', astyle=s', aprice=mp'}
-- https://beancount.github.io/docs/beancount_language_syntax.html#commodities-currencies -- https://beancount.github.io/docs/beancount_language_syntax.html#commodities-currencies
where where
c' | c=="$" = "USD" c' = T.toUpper $
| otherwise = c T.replace "$" "USD" $
T.replace "" "EUR" $
T.replace "¥" "JPY" $
T.replace "£" "GBP" $
c
s' = s{ascommodityside=R, ascommodityspaced=True} s' = s{ascommodityside=R, ascommodityspaced=True}
mp' = costToBeancount <$> mp mp' = costToBeancount <$> mp
where where

View File

@ -193,11 +193,11 @@ showTransactionBeancount t =
(payee,note) = (payee,note) =
case payeeAndNoteFromDescription' $ tdescription t of case payeeAndNoteFromDescription' $ tdescription t of
("","") -> ("", "" ) ("","") -> ("", "" )
(p ,"") -> (wrapq p, wrapq "")
("",n ) -> ("" , wrapq n ) ("",n ) -> ("" , wrapq n )
(p ,"") -> (wrapq p, wrapq "")
(p ,n ) -> (wrapq p, wrapq n ) (p ,n ) -> (wrapq p, wrapq n )
where where
wrapq = wrap " \"" "\"" wrapq = wrap " \"" "\"" . escapeDoubleQuotes . escapeBackslash
tags = T.concat $ map ((" #"<>).fst) $ ttags t tags = T.concat $ map ((" #"<>).fst) $ ttags t
(samelinecomment, newlinecomments) = (samelinecomment, newlinecomments) =
case renderCommentLines (tcomment t) of [] -> ("",[]) case renderCommentLines (tcomment t) of [] -> ("",[])

View File

@ -19,6 +19,7 @@ module Hledger.Utils.Text
-- quotechars, -- quotechars,
-- whitespacechars, -- whitespacechars,
escapeDoubleQuotes, escapeDoubleQuotes,
escapeBackslash,
-- escapeSingleQuotes, -- escapeSingleQuotes,
-- escapeQuotes, -- escapeQuotes,
-- words', -- words',
@ -139,6 +140,9 @@ whitespacechars = " \t\n\r"
escapeDoubleQuotes :: T.Text -> T.Text escapeDoubleQuotes :: T.Text -> T.Text
escapeDoubleQuotes = T.replace "\"" "\\\"" escapeDoubleQuotes = T.replace "\"" "\\\""
escapeBackslash :: T.Text -> T.Text
escapeBackslash = T.replace "\\" "\\\\"
-- escapeSingleQuotes :: T.Text -> T.Text -- escapeSingleQuotes :: T.Text -> T.Text
-- escapeSingleQuotes = T.replace "'" "\'" -- escapeSingleQuotes = T.replace "'" "\'"

View File

@ -118,17 +118,26 @@ The output formats supported are
`txt`, `beancount`, `csv`, `tsv`, `json` and `sql`. `txt`, `beancount`, `csv`, `tsv`, `json` and `sql`.
*Experimental:* *Experimental:*
The `beancount` format tries to produce Beancount-compatible output. The `beancount` format tries to produce Beancount-compatible output, as follows:
It is very basic and may require additional manual fixups:
- Transaction and postings with unmarked status are converted to cleared (`*`) status. - Transaction and postings with unmarked status are converted to cleared (`*`) status.
- Transactions' payee and note are wrapped in double quotes. - Transactions' payee and note are backslash-escaped and double-quote-escaped and wrapped in double quotes.
- Transaction tags are copied to Beancount #tag format. - Transaction tags are copied to Beancount #tag format.
- Account name parts are capitalised, and if the first account name part - Commodity symbols are converted to upper case, and a small number of currency symbols
is not one of Assets, Liabilities, Equity, Income, or Expenses, "Equity:" is prepended. like `$` are converted to the corresponding currency names.
- The `$` commodity symbol is converted to `USD`. - Account name parts are capitalised and unsupported characters are replaced with `-`.
If an account name part does not begin with a letter, or if the first part
is not Assets, Liabilities, Equity, Income, or Expenses, an error is raised.
(Use `--alias` options to bring your accounts into compliance.)
- An `open` directive is generated for each account used, on the earliest transaction date. - An `open` directive is generated for each account used, on the earliest transaction date.
Some limitations:
- Balance assertions are removed.
- Balance assignments become missing amounts.
- Virtual and balanced virtual postings become regular postings.
- Directives are not converted.
Here's an example of print's CSV output: Here's an example of print's CSV output:
```shell ```shell