From aff3cd05914db9df1a27560d4688de14a50fc9c4 Mon Sep 17 00:00:00 2001 From: Simon Michael Date: Fri, 11 Oct 2024 14:49:21 -1000 Subject: [PATCH] imp:print:beancount: more robust conversion of commodity names --- hledger-lib/Hledger/Write/Beancount.hs | 79 +++++++++++++++++++++----- hledger/hledger.m4.md | 51 ++++++++++------- 2 files changed, 98 insertions(+), 32 deletions(-) diff --git a/hledger-lib/Hledger/Write/Beancount.hs b/hledger-lib/Hledger/Write/Beancount.hs index 3a1b41454..1efa8570c 100644 --- a/hledger-lib/Hledger/Write/Beancount.hs +++ b/hledger-lib/Hledger/Write/Beancount.hs @@ -32,9 +32,11 @@ import Hledger.Utils import Hledger.Data.Types import Hledger.Data.AccountName import Hledger.Data.Amount +import Hledger.Data.Currency (currencySymbolToCode) import Hledger.Data.Dates (showDate) import Hledger.Data.Posting (renderCommentLines, showBalanceAssertion, postingIndent) import Hledger.Data.Transaction (payeeAndNoteFromDescription') +import Data.Function ((&)) --- ** doctest setup -- $setup @@ -135,10 +137,10 @@ type BeancountAccountName = AccountName type BeancountAccountNameComponent = AccountName -- | Convert a hledger account name to a valid Beancount account name. --- It replaces non-supported characters with @-@ (warning: in extreme cases --- separate accounts could end up with the same name), it prepends the letter B --- to any part which doesn't begin with a letter or number, and it capitalises --- each part. It also checks that the first part is one of the required english +-- It replaces non-supported characters with a dash, it prepends the letter B +-- to any part which doesn't begin with a letter or number, and it capitalises each part. +-- It's possible this could generate the same beancount name for distinct hledger account names. +-- It also checks that the first part is one of the required english -- account names Assets, Liabilities, Equity, Income, or Expenses, and if not -- it raises an informative error suggesting --alias. -- Ref: https://beancount.github.io/docs/beancount_language_syntax.html#accounts @@ -198,24 +200,75 @@ beancountTopLevelAccounts = ["Assets", "Liabilities", "Equity", "Income", "Expen type BeancountAmount = Amount -- | Do some best effort adjustments to make an amount that renders --- in a way that Beancount can read: forces the commodity symbol to the right, --- converts a few currency symbols to names, capitalises all letters. +-- in a way that Beancount can read: force the commodity symbol to the right, +-- capitalise all letters, convert a few currency symbols to codes. amountToBeancount :: Amount -> BeancountAmount amountToBeancount a@Amount{acommodity=c,astyle=s,acost=mp} = a{acommodity=c', astyle=s', acost=mp'} - -- https://beancount.github.io/docs/beancount_language_syntax.html#commodities-currencies where - c' = T.toUpper $ - T.replace "$" "USD" $ - T.replace "€" "EUR" $ - T.replace "¥" "JPY" $ - T.replace "£" "GBP" $ - c + c' = commodityToBeancount c s' = s{ascommodityside=R, ascommodityspaced=True} mp' = costToBeancount <$> mp where costToBeancount (TotalCost amt) = TotalCost $ amountToBeancount amt costToBeancount (UnitCost amt) = UnitCost $ amountToBeancount amt +type BeancountCommoditySymbol = CommoditySymbol + +-- | Convert a hledger commodity name to a valid Beancount commodity name. +-- That is: 2-24 uppercase letters / digits / apostrophe / period / underscore / dash, +-- starting with a letter, and ending with a letter or digit. +-- Ref: https://beancount.github.io/docs/beancount_language_syntax.html#commodities-currencies +-- So this: removes any enclosing double quotes, +-- replaces some common currency symbols with currency codes, +-- capitalises all letters, +-- replaces any invalid characters with a dash (-), +-- prepends a B if the first character is not a letter, +-- and appends a B if the last character is not a letter or digit. +-- It's possible this could generate unreadable commodity names, +-- or the same beancount name for distinct hledger commodity names. +-- +-- >>> commodityToBeancount "" +-- "B" +-- >>> commodityToBeancount "$" +-- "USD" +-- >>> commodityToBeancount "Usd" +-- "USD" +-- >>> commodityToBeancount "\"a1\"" +-- "A1" +-- >>> commodityToBeancount "\"A 1!\"" +-- "A-1-B" +-- +commodityToBeancount :: CommoditySymbol -> BeancountCommoditySymbol +commodityToBeancount com = + dbg9 "beancount commodity name" $ + let com' = stripquotes com + in case currencySymbolToCode com' of + Just code -> code + Nothing -> + com' + & T.toUpper + & T.map (\d -> if isBeancountCommodityChar d then d else '-') + & fixstart + & fixend + where + fixstart bcom = case T.uncons bcom of + Just (c,_) | isBeancountCommodityStartChar c -> bcom + _ -> "B" <> bcom + fixend bcom = case T.unsnoc bcom of + Just (_,c) | isBeancountCommodityEndChar c -> bcom + _ -> bcom <> "B" + +-- | Is this a valid character in the middle of a Beancount commodity name (a capital letter, digit, or '._-) ? +isBeancountCommodityChar :: Char -> Bool +isBeancountCommodityChar c = (isLetter c && isUpperCase c) || isDigit c || c `elem` ['\'', '.', '_', '-'] + +-- | Is this a valid character to start a Beancount commodity name (a capital letter) ? +isBeancountCommodityStartChar :: Char -> Bool +isBeancountCommodityStartChar c = isLetter c && isUpperCase c + +-- | Is this a valid character to end a Beancount commodity name (a capital letter or digit) ? +isBeancountCommodityEndChar :: Char -> Bool +isBeancountCommodityEndChar c = (isLetter c && isUpperCase c) || isDigit c --- ** tests diff --git a/hledger/hledger.m4.md b/hledger/hledger.m4.md index f973ad5a5..6dac5907c 100644 --- a/hledger/hledger.m4.md +++ b/hledger/hledger.m4.md @@ -694,34 +694,42 @@ It is a good format to use if you are exporting to their spreadsheet app. This is [Beancount's journal format][beancount journal]. You can use this to export your hledger data to [Beancount], perhaps to query it with [Beancount Query Language] or with the [Fava] web app. +hledger will try to adjust your data to suit Beancount. +If you plan to export often, you may want to follow Beancount's conventions in your hledger data, +to ease conversion. Eg use Beancount-friendly account names, currency codes instead of currency symbols, +and avoid virtual postings, redundant cost notation, etc. + +Here are more details, included here for now +(see also "hledger and Beancount" ). #### Beancount account names -hledger will try adjust your account names to the more restricted -[Beancount account names](https://beancount.github.io/docs/beancount_language_syntax.html#accounts), by -capitalising, replacing unsupported characters with `-`, and/or +hledger will try adjust your account names, if needed, to +[Beancount account names](https://beancount.github.io/docs/beancount_language_syntax.html#accounts), +by capitalising, replacing unsupported characters with `-`, and prepending `B` to parts which don't begin with a letter or digit. +(It's possible for this to convert distinct hledger account names to the same beancount name. +Eg, hledger's automatic equity conversion accounts can have currency symbols in their name, +so `equity:conversion:$-€` becomes `equity:conversion:B---`.) -But you must ensure that the top level account names are `Assets`, `Liabilities`, `Equity`, `Income`, and `Expenses`. -If yours are not, you can use [account aliases](#alias-directive), usually in the form of `--alias` options, -possibly stored in a [config file](#config-file). -(Example: [hledger2beancount.conf](https://github.com/simonmichael/hledger/blob/master/examples/hledger2beancount.conf)) +In addition, you must ensure that the top level account names are `Assets`, `Liabilities`, `Equity`, `Income`, and `Expenses`, +which Beancount requires. +If yours are named differently, you can use [account aliases](#alias-directive), +usually in the form of `--alias` options, possibly stored in a [config file](#config-file). +(An example: [hledger2beancount.conf](https://github.com/simonmichael/hledger/blob/master/examples/hledger2beancount.conf)) #### Beancount commodity names -[Beancount commodity/currency names](https://beancount.github.io/docs/beancount_language_syntax.html#commodities-currencies) -also are more restricted: they must be 2-24 uppercase letters, digits, or `'`, `.`, `_`, `-`, +hledger will adjust your commodity names, if needed, to +[Beancount commodity/currency names](https://beancount.github.io/docs/beancount_language_syntax.html#commodities-currencies), +which must be 2-24 uppercase letters, digits, or `'`, `.`, `_`, `-`, beginning with a letter and ending with a letter or digit. - -Currently hledger helps only a little with this: -if you are using currency symbols `$`, `€`, `£` or `¥`, these will be converted -to the equivalent [ISO 4217](https://en.wikipedia.org/wiki/ISO_4217#Active_codes) currency codes. - -Other symbols, or other commodity names not valid for Beancount, must be adjusted by you, -either permanently in your journal, or in a temporary copy used just for export. - -Amounts with no currency symbol will also not work for Beancount. -If you want to keep using those, the [`D` directive](#d-directive) is one way to add a temporary commodity symbol. +hledger will convert known currency symbols to [ISO 4217 currency codes](https://en.wikipedia.org/wiki/ISO_4217#Active_codes). +Otherwise, it will capitalise letters, +replace unsupported characters with a dash (-), +and prepend/append a "B" when needed. +(It's possible for this to generate unreadable commodity names, +or to convert distinct hledger commodity names to the same beancount name.) #### Beancount virtual postings @@ -730,6 +738,11 @@ so you will need to comment those, or use `--real` to exclude transactions that use them. (If you have transactions which are a mixture of balanced and unbalanced postings, you'll have to do something more.) +#### Beancount costs + +Beancount doesn't allow [redundant cost notation](https://hledger.org/hledger.html#combining-costs-and-equity-conversion-postings) +as hledger does. If you have entries like this, you may need to comment out either the costs or the equity postings. + [Beancount]: https://beancount.github.io [beancount journal]: https://beancount.github.io/docs/beancount_language_syntax.html [Beancount Query Language]: https://beancount.github.io/docs/beancount_query_language.html