;bin: sortandmergepostings: Overhaul for more robust determinism
* Avoids non-deterministic flip-flopping when the alphabetical account sort has multiple commodities * Sorts postings commodities so commodities are in the same order across transactions * Sorts postings with matching commodity by posting amount
This commit is contained in:
parent
9031612c30
commit
62092749ef
@ -233,12 +233,14 @@ $ watchaccounts -f time.journal client1 date:thismonth -l
|
|||||||
### sortandmergepostings
|
### sortandmergepostings
|
||||||
|
|
||||||
[`sortandmergepostings`](https://github.com/simonmichael/hledger/blob/master/bin/sortandmergepostings)
|
[`sortandmergepostings`](https://github.com/simonmichael/hledger/blob/master/bin/sortandmergepostings)
|
||||||
is an adventuresome awk script intended to clean up and merge similar postings in a transaction
|
is an adventuresome AWK script intended to clean up and merge similar postings in a transaction
|
||||||
(see [original discussion](https://unix.stackexchange.com/questions/526995/re-order-lines-and-merge-others-based-on-a-specific-criteria/527004)).
|
(see [original discussion](https://unix.stackexchange.com/questions/526995/re-order-lines-and-merge-others-based-on-a-specific-criteria/527004)).
|
||||||
It sorts postings so that positive ones are first, negative ones last.
|
It sorts postings so that positive ones are first, negative ones last.
|
||||||
Within each sign, postings are sorted alphabetically by account name.
|
Within each sign, postings are sorted by commodity.
|
||||||
Lastly if there are multiple postings to the same account in the same direction, it tries to merge them (by leaving some amounts blank).
|
Within each commodity group, postings are sorted by amount.
|
||||||
Piping the output to `hledger print` can recalculate the missing amounts.
|
Among identical amounts in the same group, postings are sorted alphabetically by account name.
|
||||||
|
Once sorted, if there are multiple postings to the same account in the same direction with the same commodity and comments, it tries to merge them (by leaving some amounts blank).
|
||||||
|
Subsequently piping the output to `hledger print` can recalculate the missing amounts.
|
||||||
Multiple runs might be needed to clean up all duplicates.
|
Multiple runs might be needed to clean up all duplicates.
|
||||||
```cli
|
```cli
|
||||||
$ sortandmergepostings input.journal | hledger -f - print -x
|
$ sortandmergepostings input.journal | hledger -f - print -x
|
||||||
|
|||||||
@ -3,9 +3,11 @@
|
|||||||
#
|
#
|
||||||
# Passed a ledger file, this will:
|
# Passed a ledger file, this will:
|
||||||
# 1. Sort accretion postings before deductions
|
# 1. Sort accretion postings before deductions
|
||||||
# 2. Sort postings by account alphabetically
|
# 3. Sort posting groups by commodity (descending for accretions, ascending for deductions)
|
||||||
# 3. Merge 1 set of postings with the same account and direction by clearing
|
# 3. Sort commodity groups by amount (descending)
|
||||||
# the amount field. Note all posting meta data must also match to merge.
|
# 2. Sort commodity groups by account name
|
||||||
|
# 3. Merge 1 set of postings with the same account, commodity, direction, and other
|
||||||
|
# meta data by clearing the commodity and amounts and reducing to a single posting.
|
||||||
#
|
#
|
||||||
# Suggested usage:
|
# Suggested usage:
|
||||||
# $ sortandmergepostings journal.ledger | hledger -f - print -x
|
# $ sortandmergepostings journal.ledger | hledger -f - print -x
|
||||||
@ -13,37 +15,120 @@
|
|||||||
# Given that each run will only merge and recalculate amounts on one account per
|
# Given that each run will only merge and recalculate amounts on one account per
|
||||||
# transaction it may need to be run multiple times to fully normalize a ledger.
|
# transaction it may need to be run multiple times to fully normalize a ledger.
|
||||||
|
|
||||||
BEGIN { FS = "[[:space:]][[:space:]]+" }
|
BEGIN {
|
||||||
|
FS = "[[:space:]][[:space:]]+"
|
||||||
|
DATE = "([0-9]{4}-[0-9]{2}-[0-9]{2})"
|
||||||
|
KS = "___"
|
||||||
|
}
|
||||||
|
|
||||||
|
function extract_account(val) {
|
||||||
|
sub(/^[*!] /, "", val)
|
||||||
|
gsub(/^\(\)\[\]/, "", val)
|
||||||
|
return val
|
||||||
|
}
|
||||||
|
|
||||||
|
function extract_commodity(val) {
|
||||||
|
split(val, segs, / *[@=]+ */)
|
||||||
|
gsub(/[-[:digit:]., ]+/, "", segs[1])
|
||||||
|
return segs[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
function extract_amount(val) {
|
||||||
|
split(val, segs, / *[@=]+ */)
|
||||||
|
gsub(/[^-[:digit:].,]+/, "", segs[1])
|
||||||
|
gsub(/[\.,]/, "_", segs[1])
|
||||||
|
v = gensub(/_([^_]+)$/, ".\\1", "1", segs[1])
|
||||||
|
gsub(/_/, "", v)
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
|
||||||
|
function extract_direction(val) {
|
||||||
|
split(val, segs, / *=+ */)
|
||||||
|
gsub(/[^-]/, "", segs[1])
|
||||||
|
return segs[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
function extract_date(val) {
|
||||||
|
posting_date = transaction_date
|
||||||
|
if (match($val, "date2?: *" DATE, tday)) {
|
||||||
|
posting_date = tday[1]
|
||||||
|
}
|
||||||
|
return posting_date
|
||||||
|
}
|
||||||
|
|
||||||
|
function make_posting_key(account, commodity, comment) {
|
||||||
|
posting_key = account commodity comment
|
||||||
|
return posting_key
|
||||||
|
}
|
||||||
|
|
||||||
|
function make_sort_key(date, account, firstamount, commodity, postingct) {
|
||||||
|
result = date
|
||||||
|
key[2] = account
|
||||||
|
key[3] = commodity
|
||||||
|
key[4] = firstamount
|
||||||
|
key[5] = postingct
|
||||||
|
for (i in key)
|
||||||
|
result = result KS key[i]
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
function sort_keys(i1, v1, i2, v2, l, r) {
|
||||||
|
split(i1, a, KS)
|
||||||
|
split(i2, b, KS)
|
||||||
|
for (i in a) {
|
||||||
|
if (a[i] == b[i]) continue
|
||||||
|
if (i == 3) {
|
||||||
|
if (a[4] > 0) {
|
||||||
|
return a[i] > b[i] ? 1 : -1
|
||||||
|
} else {
|
||||||
|
return a[i] < b[i] ? 1 : -1
|
||||||
|
}
|
||||||
|
} else if (i == 4) {
|
||||||
|
return a[i] < b[i] ? 1 : -1
|
||||||
|
} else {
|
||||||
|
return a[i] > b[i] ? 1 : -1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
function dump() {
|
function dump() {
|
||||||
an = asorti(accretions, as)
|
an = asorti(accretions, as, "sort_keys")
|
||||||
dn = asorti(deductions, ds)
|
dn = asorti(deductions, ds, "sort_keys")
|
||||||
for (i=1; i<=an; i++) {
|
for (i=1; i<=an; i++) {
|
||||||
postings[length(postings)+1] = accretions[as[i]]
|
postings[length(postings)+1] = accretions[as[i]]
|
||||||
}
|
}
|
||||||
for (i=1; i<=dn; i++) {
|
for (i=1; i<=dn; i++) {
|
||||||
postings[length(postings)+1] = deductions[ds[i]]
|
postings[length(postings)+1] = deductions[ds[i]]
|
||||||
}
|
}
|
||||||
|
if (inferred_posting) delete seen
|
||||||
for (i in postings) {
|
for (i in postings) {
|
||||||
posting = postings[i]
|
posting = postings[i]
|
||||||
split(posting, parts, FS)
|
split(posting, parts, FS)
|
||||||
currency = parts[3]
|
account = extract_account(parts[2])
|
||||||
gsub(/[[:digit:]., ]+/, "", currency)
|
commodity = extract_commodity(parts[3])
|
||||||
if (!inferred && (!merge || merge == parts[2]) && seen[parts[2] currency parts[4]]>1 && parts[3] !~ /@/) {
|
comment = parts[4]
|
||||||
if (!merge) merged[i] = " " parts[2] " " parts[4]
|
posting_key = make_posting_key(account, commodity, comment)
|
||||||
merge = parts[2]
|
as_inferred = " " account " " comment
|
||||||
|
if (seen[posting_key] < 2 || (inferred_commodity && inferred_commodity != commodity)) {
|
||||||
|
explicit_postings[i] = posting
|
||||||
} else {
|
} else {
|
||||||
merged[i] = posting
|
if (!inferred_posting || as_inferred == inferred_posting) {
|
||||||
|
inferred_posting = as_inferred
|
||||||
|
inferred_commodity = commodity
|
||||||
|
} else {
|
||||||
|
explicit_postings[i] = posting
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (i in merged) print merged[i]
|
for (i in explicit_postings) print explicit_postings[i]
|
||||||
if (inferred) print inferred
|
if (inferred_posting) print inferred_posting
|
||||||
inferred = ""
|
inferred_posting = ""
|
||||||
merge = ""
|
merged_key = ""
|
||||||
delete accretions
|
delete accretions
|
||||||
delete deductions
|
delete deductions
|
||||||
delete postings
|
delete postings
|
||||||
delete merged
|
delete explicit_postings
|
||||||
delete seen
|
delete seen
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -59,38 +144,47 @@ END {
|
|||||||
|
|
||||||
/^[^[:space:]]/ {
|
/^[^[:space:]]/ {
|
||||||
dump()
|
dump()
|
||||||
|
if (match($0, "^" DATE, tday)) {
|
||||||
|
transaction_date = tday[1]
|
||||||
|
}
|
||||||
print $0
|
print $0
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
postingct++
|
postingct++
|
||||||
account = $2
|
posting = $0
|
||||||
|
account = extract_account($2)
|
||||||
|
commodity = extract_commodity($3)
|
||||||
amount = $3
|
amount = $3
|
||||||
comments = $4
|
firstamount = extract_amount($3)
|
||||||
currency = amount
|
direction = extract_direction($3)
|
||||||
gsub(/[[:digit:]., ]+/, "", currency)
|
comment = $4
|
||||||
sub(/^[*!] /, "", account)
|
date = extract_date($4)
|
||||||
|
sort_key = make_sort_key(date, account, firstamount, commodity, postingct)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Immediately output transaction comments at the top
|
||||||
account ~ /^;/ {
|
account ~ /^;/ {
|
||||||
print
|
print
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# If amount is blank, this is our one and only allowed inferred amount
|
||||||
!amount {
|
!amount {
|
||||||
inferred = $0
|
inferred_posting = posting
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
|
||||||
amount !~ /@/ {
|
# If no rates or balance assertions, this is eligable for merging
|
||||||
seen[account currency comments]++
|
amount !~ /[@=]/ {
|
||||||
|
seen[make_posting_key(account, commodity, comment)]++
|
||||||
}
|
}
|
||||||
|
|
||||||
amount !~ /-/ {
|
direction !~ /-/ {
|
||||||
accretions[account postingct] = $0
|
accretions[sort_key] = posting
|
||||||
}
|
}
|
||||||
|
|
||||||
amount ~ /-/ {
|
direction ~ /-/ {
|
||||||
deductions[account postingct] = $0
|
deductions[sort_key] = posting
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user