#!/usr/bin/awk -f # Script adapted from suggestions on https://unix.stackexchange.com/a/527004/1925 # # Passed a ledger file, this will: # 1. Sort accretion postings before deductions # 3. Sort posting groups by commodity (descending for accretions, ascending for deductions) # 3. Sort commodity groups by amount (descending) # 2. Sort commodity groups by account name # 3. Merge 1 set of postings with the same account, commodity, direction, and other # meta data by clearing the commodity and amounts and reducing to a single posting. # # Suggested usage: # $ sortandmergepostings journal.ledger | hledger -f - print -x # # Given that each run will only merge and recalculate amounts on one account per # transaction it may need to be run multiple times to fully normalize a ledger. BEGIN { FS = "[[:space:]][[:space:]]+" DATE = "([0-9]{4}-[0-9]{2}-[0-9]{2})" KS = "___" } function extract_account(val) { sub(/^[*!] /, "", val) gsub(/^\(\)\[\]/, "", val) return val } function extract_commodity(val) { split(val, segs, / *[@=]+ */) gsub(/[-[:digit:]., ]+/, "", segs[1]) return segs[1] } function extract_amount(val) { split(val, segs, / *[@=]+ */) gsub(/[^-[:digit:].,]+/, "", segs[1]) gsub(/[\.,]/, "_", segs[1]) v = gensub(/_([^_]+)$/, ".\\1", "1", segs[1]) gsub(/_/, "", v) return v } function extract_direction(val) { split(val, segs, / *=+ */) gsub(/[^-]/, "", segs[1]) return segs[1] } function extract_date(val) { posting_date = transaction_date if (match($val, "date2?: *" DATE, tday)) { posting_date = tday[1] } return posting_date } function make_posting_key(account, commodity, comment) { posting_key = account commodity comment return posting_key } function make_sort_key(date, account, firstamount, commodity, postingct) { result = date key[2] = account key[3] = commodity key[4] = firstamount key[5] = postingct for (i in key) result = result KS key[i] return result } function sort_keys(i1, v1, i2, v2, l, r) { split(i1, a, KS) split(i2, b, KS) for (i in a) { if (a[i] == b[i]) continue if (i == 3) { if (a[4] > 0) { return a[i] > b[i] ? 1 : -1 } else { return a[i] < b[i] ? 1 : -1 } } else if (i == 4) { return a[i] < b[i] ? 1 : -1 } else { return a[i] > b[i] ? 1 : -1 } } return 0 } function dump() { an = asorti(accretions, as, "sort_keys") dn = asorti(deductions, ds, "sort_keys") for (i=1; i<=an; i++) { postings[length(postings)+1] = accretions[as[i]] } for (i=1; i<=dn; i++) { postings[length(postings)+1] = deductions[ds[i]] } if (inferred_posting) delete seen for (i in postings) { posting = postings[i] split(posting, parts, FS) account = extract_account(parts[2]) commodity = extract_commodity(parts[3]) comment = parts[4] posting_key = make_posting_key(account, commodity, comment) as_inferred = " " account " " comment if (seen[posting_key] < 2 || (inferred_commodity && inferred_commodity != commodity)) { explicit_postings[i] = posting } else { if (!inferred_posting || as_inferred == inferred_posting) { inferred_posting = as_inferred inferred_commodity = commodity } else { explicit_postings[i] = posting } } } for (i in explicit_postings) print explicit_postings[i] if (inferred_posting) print inferred_posting inferred_posting = "" merged_key = "" delete accretions delete deductions delete postings delete explicit_postings delete seen } !NF { dump() print next } END { dump() } /^[^[:space:]]/ { dump() if (match($0, "^" DATE, tday)) { transaction_date = tday[1] } print $0 next } { postingct++ posting = $0 account = extract_account($2) commodity = extract_commodity($3) amount = $3 firstamount = extract_amount($3) direction = extract_direction($3) comment = $4 date = extract_date($4) sort_key = make_sort_key(date, account, firstamount, commodity, postingct) } # Immediately output transaction comments at the top account ~ /^;/ { print next } # If amount is blank, this is our one and only allowed inferred amount !amount { inferred_posting = posting next } # If no rates or balance assertions, this is eligable for merging amount !~ /[@=]/ { seen[make_posting_key(account, commodity, comment)]++ } direction !~ /-/ { accretions[sort_key] = posting } direction ~ /-/ { deductions[sort_key] = posting }