* Avoids non-deterministic flip-flopping when the alphabetical account sort has multiple commodities * Sorts postings commodities so commodities are in the same order across transactions * Sorts postings with matching commodity by posting amount
191 lines
4.7 KiB
Awk
Executable File
191 lines
4.7 KiB
Awk
Executable File
#!/usr/bin/awk -f
|
|
# Script adapted from suggestions on https://unix.stackexchange.com/a/527004/1925
|
|
#
|
|
# Passed a ledger file, this will:
|
|
# 1. Sort accretion postings before deductions
|
|
# 3. Sort posting groups by commodity (descending for accretions, ascending for deductions)
|
|
# 3. Sort commodity groups by amount (descending)
|
|
# 2. Sort commodity groups by account name
|
|
# 3. Merge 1 set of postings with the same account, commodity, direction, and other
|
|
# meta data by clearing the commodity and amounts and reducing to a single posting.
|
|
#
|
|
# Suggested usage:
|
|
# $ sortandmergepostings journal.ledger | hledger -f - print -x
|
|
#
|
|
# Given that each run will only merge and recalculate amounts on one account per
|
|
# transaction it may need to be run multiple times to fully normalize a ledger.
|
|
|
|
BEGIN {
|
|
FS = "[[:space:]][[:space:]]+"
|
|
DATE = "([0-9]{4}-[0-9]{2}-[0-9]{2})"
|
|
KS = "___"
|
|
}
|
|
|
|
function extract_account(val) {
|
|
sub(/^[*!] /, "", val)
|
|
gsub(/^\(\)\[\]/, "", val)
|
|
return val
|
|
}
|
|
|
|
function extract_commodity(val) {
|
|
split(val, segs, / *[@=]+ */)
|
|
gsub(/[-[:digit:]., ]+/, "", segs[1])
|
|
return segs[1]
|
|
}
|
|
|
|
function extract_amount(val) {
|
|
split(val, segs, / *[@=]+ */)
|
|
gsub(/[^-[:digit:].,]+/, "", segs[1])
|
|
gsub(/[\.,]/, "_", segs[1])
|
|
v = gensub(/_([^_]+)$/, ".\\1", "1", segs[1])
|
|
gsub(/_/, "", v)
|
|
return v
|
|
}
|
|
|
|
function extract_direction(val) {
|
|
split(val, segs, / *=+ */)
|
|
gsub(/[^-]/, "", segs[1])
|
|
return segs[1]
|
|
}
|
|
|
|
function extract_date(val) {
|
|
posting_date = transaction_date
|
|
if (match($val, "date2?: *" DATE, tday)) {
|
|
posting_date = tday[1]
|
|
}
|
|
return posting_date
|
|
}
|
|
|
|
function make_posting_key(account, commodity, comment) {
|
|
posting_key = account commodity comment
|
|
return posting_key
|
|
}
|
|
|
|
function make_sort_key(date, account, firstamount, commodity, postingct) {
|
|
result = date
|
|
key[2] = account
|
|
key[3] = commodity
|
|
key[4] = firstamount
|
|
key[5] = postingct
|
|
for (i in key)
|
|
result = result KS key[i]
|
|
return result
|
|
}
|
|
|
|
function sort_keys(i1, v1, i2, v2, l, r) {
|
|
split(i1, a, KS)
|
|
split(i2, b, KS)
|
|
for (i in a) {
|
|
if (a[i] == b[i]) continue
|
|
if (i == 3) {
|
|
if (a[4] > 0) {
|
|
return a[i] > b[i] ? 1 : -1
|
|
} else {
|
|
return a[i] < b[i] ? 1 : -1
|
|
}
|
|
} else if (i == 4) {
|
|
return a[i] < b[i] ? 1 : -1
|
|
} else {
|
|
return a[i] > b[i] ? 1 : -1
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
function dump() {
|
|
an = asorti(accretions, as, "sort_keys")
|
|
dn = asorti(deductions, ds, "sort_keys")
|
|
for (i=1; i<=an; i++) {
|
|
postings[length(postings)+1] = accretions[as[i]]
|
|
}
|
|
for (i=1; i<=dn; i++) {
|
|
postings[length(postings)+1] = deductions[ds[i]]
|
|
}
|
|
if (inferred_posting) delete seen
|
|
for (i in postings) {
|
|
posting = postings[i]
|
|
split(posting, parts, FS)
|
|
account = extract_account(parts[2])
|
|
commodity = extract_commodity(parts[3])
|
|
comment = parts[4]
|
|
posting_key = make_posting_key(account, commodity, comment)
|
|
as_inferred = " " account " " comment
|
|
if (seen[posting_key] < 2 || (inferred_commodity && inferred_commodity != commodity)) {
|
|
explicit_postings[i] = posting
|
|
} else {
|
|
if (!inferred_posting || as_inferred == inferred_posting) {
|
|
inferred_posting = as_inferred
|
|
inferred_commodity = commodity
|
|
} else {
|
|
explicit_postings[i] = posting
|
|
}
|
|
}
|
|
}
|
|
for (i in explicit_postings) print explicit_postings[i]
|
|
if (inferred_posting) print inferred_posting
|
|
inferred_posting = ""
|
|
merged_key = ""
|
|
delete accretions
|
|
delete deductions
|
|
delete postings
|
|
delete explicit_postings
|
|
delete seen
|
|
}
|
|
|
|
!NF {
|
|
dump()
|
|
print
|
|
next
|
|
}
|
|
|
|
END {
|
|
dump()
|
|
}
|
|
|
|
/^[^[:space:]]/ {
|
|
dump()
|
|
if (match($0, "^" DATE, tday)) {
|
|
transaction_date = tday[1]
|
|
}
|
|
print $0
|
|
next
|
|
}
|
|
|
|
{
|
|
postingct++
|
|
posting = $0
|
|
account = extract_account($2)
|
|
commodity = extract_commodity($3)
|
|
amount = $3
|
|
firstamount = extract_amount($3)
|
|
direction = extract_direction($3)
|
|
comment = $4
|
|
date = extract_date($4)
|
|
sort_key = make_sort_key(date, account, firstamount, commodity, postingct)
|
|
}
|
|
|
|
# Immediately output transaction comments at the top
|
|
account ~ /^;/ {
|
|
print
|
|
next
|
|
}
|
|
|
|
# If amount is blank, this is our one and only allowed inferred amount
|
|
!amount {
|
|
inferred_posting = posting
|
|
next
|
|
}
|
|
|
|
# If no rates or balance assertions, this is eligable for merging
|
|
amount !~ /[@=]/ {
|
|
seen[make_posting_key(account, commodity, comment)]++
|
|
}
|
|
|
|
direction !~ /-/ {
|
|
accretions[sort_key] = posting
|
|
}
|
|
|
|
direction ~ /-/ {
|
|
deductions[sort_key] = posting
|
|
}
|