feat: any:, all: queries allow more powerful transaction matching

This commit is contained in:
Simon Michael 2025-04-18 14:04:10 -10:00
parent d13b334fb2
commit 81238e5f27
2 changed files with 126 additions and 71 deletions

View File

@ -95,17 +95,14 @@ import Hledger.Data.Amount (amountsRaw, mixedAmount, nullamt, usd)
import Hledger.Data.Dates
import Hledger.Data.Posting
import Hledger.Data.Transaction
import Data.Bifunctor
-- | A query is a composition of search criteria, which can be used to
-- match postings, transactions, accounts and more.
data Query =
-- compound queries
Not Query -- ^ negate this match
| And [Query] -- ^ match if all of these match
| Or [Query] -- ^ match if any of these match
-- no-op queries
| Any -- ^ always match
Any -- ^ always match
| None -- ^ never match
-- data queries (in "standard" order, roughly as they appear in a transaction)
| Date DateSpan -- ^ match primary dates in this date span
@ -121,6 +118,14 @@ data Query =
| Real Bool -- ^ match postings with this "realness" value
| Amt OrdPlus Quantity -- ^ match if the amount's numeric quantity is less than/greater than/equal to/unsignedly equal to some value
| Sym Regexp -- ^ match if the commodity symbol is fully-matched by this regexp
-- compound queries (expr:)
| Not Query -- ^ negate this match
| And [Query] -- ^ match if all of these match
| Or [Query] -- ^ match if any of these match
-- compound queries for transactions (any:, all:)
-- If used in a non transaction-matching context, these are equivalent to And.
| AnyPosting [Query] -- ^ match if any one posting is matched by all of these
| AllPostings [Query] -- ^ match if all postings are matched by all of these
deriving (Eq,Show)
instance Default Query where def = Any
@ -263,6 +268,8 @@ queryprefixes = map (<>":") [
,"tag"
,"type"
,"expr"
,"any"
,"all"
]
defaultprefix :: T.Text
@ -308,6 +315,8 @@ parseQueryTerm _ (T.stripPrefix "cur:" -> Just s) = (,[]) . Sym <$> toRegexCI ("
parseQueryTerm _ (T.stripPrefix "tag:" -> Just s) = (,[]) <$> parseTag s
parseQueryTerm _ (T.stripPrefix "type:" -> Just s) = (,[]) <$> parseTypeCodes s
parseQueryTerm d (T.stripPrefix "expr:" -> Just s) = parseBooleanQuery d s
parseQueryTerm d (T.stripPrefix "any:" -> Just s) = first (AnyPosting . (:[])) <$> parseBooleanQuery d s
parseQueryTerm d (T.stripPrefix "all:" -> Just s) = first (AllPostings . (:[])) <$> parseBooleanQuery d s
parseQueryTerm _ "" = Right (Any, [])
parseQueryTerm d s = parseQueryTerm d $ defaultprefix<>":"<>s
@ -805,6 +814,8 @@ matchesAmount (Any) _ = True
matchesAmount (None) _ = False
matchesAmount (Or qs) a = any (`matchesAmount` a) qs
matchesAmount (And qs) a = all (`matchesAmount` a) qs
matchesAmount (AnyPosting qs) a = all (`matchesAmount` a) qs
matchesAmount (AllPostings qs) a = all (`matchesAmount` a) qs
matchesAmount (Amt ord n) a = compareAmount ord n a
matchesAmount (Sym r) a = matchesCommodity (Sym r) (acommodity a)
matchesAmount _ _ = True
@ -834,6 +845,8 @@ matchesAccount (None) _ = False
matchesAccount (Not m) a = not $ matchesAccount m a
matchesAccount (Or ms) a = any (`matchesAccount` a) ms
matchesAccount (And ms) a = all (`matchesAccount` a) ms
matchesAccount (AnyPosting qs) a = all (`matchesAccount` a) qs
matchesAccount (AllPostings qs) a = all (`matchesAccount` a) qs
matchesAccount (Acct r) a = regexMatchText r a
matchesAccount (Depth d) a = accountNameLevel a <= d
matchesAccount (DepthAcct r d) a = accountNameLevel a <= d || not (regexMatchText r a)
@ -852,6 +865,8 @@ matchesAccountExtra :: (AccountName -> Maybe AccountType) -> (AccountName -> [Ta
matchesAccountExtra atypes atags (Not q ) a = not $ matchesAccountExtra atypes atags q a
matchesAccountExtra atypes atags (Or qs ) a = any (\q -> matchesAccountExtra atypes atags q a) qs
matchesAccountExtra atypes atags (And qs ) a = all (\q -> matchesAccountExtra atypes atags q a) qs
matchesAccountExtra atypes atags (AnyPosting qs ) a = all (\q -> matchesAccountExtra atypes atags q a) qs
matchesAccountExtra atypes atags (AllPostings qs ) a = all (\q -> matchesAccountExtra atypes atags q a) qs
matchesAccountExtra atypes _ (Type ts) a = maybe False (\t -> any (t `isAccountSubtypeOf`) ts) $ atypes a
matchesAccountExtra _ atags (Tag npat vpat) a = matchesTags npat vpat $ atags a
matchesAccountExtra _ _ q a = matchesAccount q a
@ -865,6 +880,8 @@ matchesPosting (Any) _ = True
matchesPosting (None) _ = False
matchesPosting (Or qs) p = any (`matchesPosting` p) qs
matchesPosting (And qs) p = all (`matchesPosting` p) qs
matchesPosting (AnyPosting qs) p = all (`matchesPosting` p) qs
matchesPosting (AllPostings qs) p = all (`matchesPosting` p) qs
matchesPosting (Code r) p = maybe False (regexMatchText r . tcode) $ ptransaction p
matchesPosting (Desc r) p = maybe False (regexMatchText r . tdescription) $ ptransaction p
matchesPosting (Acct r) p = matches p || maybe False matches (poriginal p) where matches = regexMatchText r . paccount
@ -889,6 +906,8 @@ matchesPostingExtra :: (AccountName -> Maybe AccountType) -> Query -> Posting ->
matchesPostingExtra atype (Not q ) p = not $ matchesPostingExtra atype q p
matchesPostingExtra atype (Or qs) p = any (\q -> matchesPostingExtra atype q p) qs
matchesPostingExtra atype (And qs) p = all (\q -> matchesPostingExtra atype q p) qs
matchesPostingExtra atype (AnyPosting qs) p = all (\q -> matchesPostingExtra atype q p) qs
matchesPostingExtra atype (AllPostings qs) p = all (\q -> matchesPostingExtra atype q p) qs
matchesPostingExtra atype (Type ts) p =
-- does posting's account's type, if we can detect it, match any of the given types ?
(maybe False (\t -> any (t `isAccountSubtypeOf`) ts) . atype $ paccount p)
@ -908,6 +927,8 @@ matchesTransaction (Any) _ = True
matchesTransaction (None) _ = False
matchesTransaction (Or qs) t = any (`matchesTransaction` t) qs
matchesTransaction (And qs) t = all (`matchesTransaction` t) qs
matchesTransaction (AnyPosting qs) t = any (\p -> all (`matchesPosting` p) qs) $ tpostings t
matchesTransaction (AllPostings qs) t = all (\p -> all (`matchesPosting` p) qs) $ tpostings t
matchesTransaction (Code r) t = regexMatchText r $ tcode t
matchesTransaction (Desc r) t = regexMatchText r $ tdescription t
matchesTransaction q@(Acct _) t = any (q `matchesPosting`) $ tpostings t
@ -932,20 +953,24 @@ matchesTransactionExtra :: (AccountName -> Maybe AccountType) -> Query -> Transa
matchesTransactionExtra atype (Not q) t = not $ matchesTransactionExtra atype q t
matchesTransactionExtra atype (Or qs) t = any (\q -> matchesTransactionExtra atype q t) qs
matchesTransactionExtra atype (And qs) t = all (\q -> matchesTransactionExtra atype q t) qs
matchesTransactionExtra atype (AnyPosting qs) t = any (\p -> all (\q -> matchesPostingExtra atype q p) qs) $ tpostings t
matchesTransactionExtra atype (AllPostings qs) t = all (\p -> all (\q -> matchesPostingExtra atype q p) qs) $ tpostings t
matchesTransactionExtra atype q@(Type _) t = any (matchesPostingExtra atype q) $ tpostings t
matchesTransactionExtra _ q t = matchesTransaction q t
-- | Does the query match this transaction description ?
-- Tests desc: terms, any other terms are ignored.
matchesDescription :: Query -> Text -> Bool
matchesDescription (Not q) d = not $ q `matchesDescription` d
matchesDescription (Any) _ = True
matchesDescription (None) _ = False
matchesDescription (Or qs) d = any (`matchesDescription` d) $ filter queryIsDesc qs
matchesDescription (And qs) d = all (`matchesDescription` d) $ filter queryIsDesc qs
matchesDescription (Code _) _ = False
matchesDescription (Desc r) d = regexMatchText r d
matchesDescription _ _ = False
matchesDescription (Not q) d = not $ q `matchesDescription` d
matchesDescription (Any) _ = True
matchesDescription (None) _ = False
matchesDescription (Or qs) d = any (`matchesDescription` d) $ filter queryIsDesc qs
matchesDescription (And qs) d = all (`matchesDescription` d) $ filter queryIsDesc qs
matchesDescription (AnyPosting qs) d = all (`matchesDescription` d) $ filter queryIsDesc qs
matchesDescription (AllPostings qs) d = all (`matchesDescription` d) $ filter queryIsDesc qs
matchesDescription (Code _) _ = False
matchesDescription (Desc r) d = regexMatchText r d
matchesDescription _ _ = False
-- | Does the query match this transaction payee ?
-- Tests desc: (and payee: ?) terms, any other terms are ignored.
@ -962,14 +987,16 @@ matchesTags namepat valuepat = any (matches namepat valuepat)
-- | Does the query match this market price ?
matchesPriceDirective :: Query -> PriceDirective -> Bool
matchesPriceDirective (None) _ = False
matchesPriceDirective (Not q) p = not $ matchesPriceDirective q p
matchesPriceDirective (Or qs) p = any (`matchesPriceDirective` p) qs
matchesPriceDirective (And qs) p = all (`matchesPriceDirective` p) qs
matchesPriceDirective q@(Amt _ _) p = matchesAmount q (pdamount p)
matchesPriceDirective q@(Sym _) p = matchesCommodity q (pdcommodity p)
matchesPriceDirective (Date spn) p = spanContainsDate spn (pddate p)
matchesPriceDirective _ _ = True
matchesPriceDirective (None) _ = False
matchesPriceDirective (Not q) p = not $ matchesPriceDirective q p
matchesPriceDirective (Or qs) p = any (`matchesPriceDirective` p) qs
matchesPriceDirective (And qs) p = all (`matchesPriceDirective` p) qs
matchesPriceDirective (AnyPosting qs) p = all (`matchesPriceDirective` p) qs
matchesPriceDirective (AllPostings qs) p = all (`matchesPriceDirective` p) qs
matchesPriceDirective q@(Amt _ _) p = matchesAmount q (pdamount p)
matchesPriceDirective q@(Sym _) p = matchesCommodity q (pdcommodity p)
matchesPriceDirective (Date spn) p = spanContainsDate spn (pddate p)
matchesPriceDirective _ _ = True
-- tests
@ -1137,4 +1164,19 @@ tests_Query = testGroup "Query" [
-- a tag match on a transaction also matches posting tags
assertBool "" $ (Tag (toRegex' "postingtag") Nothing) `matchesTransaction` nulltransaction{tpostings=[nullposting{ptags=[("postingtag","")]}]}
-- hledger print expr:'cash and amt:>0' means "show transactions with (at least one posting involving a cash account) and (at least one posting with a positive amount)"
let exprq = And [Acct $ toRegex' "cash", Amt Gt 0]
assertBool "" $ exprq `matchesTransaction` nulltransaction{tpostings=[nullposting{paccount="cash", pamount=1}]}
assertBool "" $ exprq `matchesTransaction` nulltransaction{tpostings = [nullposting{paccount="cash"}, nullposting{paccount="food", pamount=1}]}
-- hledger print any:'cash and amt:>0' means "show transactions where at least one posting posts a positive amount to a cash account".
let anyq = AnyPosting [Acct $ toRegex' "cash", Amt Gt 0]
assertBool "" $ anyq `matchesTransaction` nulltransaction{tpostings=[nullposting{paccount="cash", pamount=1}]}
assertBool "" $ not $ anyq `matchesTransaction` nulltransaction{tpostings = [nullposting{paccount="cash"}, nullposting{paccount="food", pamount=1}]}
-- hledger print all:'cash and amt:0' means "show transactions where all postings involve a cash account and have a zero amount".
assertBool "" $ AllPostings [Amt Eq 0] `matchesTransaction`
nulltransaction{tpostings = [nullposting{paccount = "cash"}, nullposting{paccount = "food"}]}
assertBool "" $ not $ AllPostings [Acct $ toRegex' "cash", Amt Eq 0] `matchesTransaction`
nulltransaction{tpostings = [nullposting{paccount = "cash"}, nullposting{paccount = "food"}]}
]

View File

@ -5240,29 +5240,31 @@ The same would be true with the argument `--depth assets=1 --depth savings=2`.
# Queries
One of hledger's strengths is being able to quickly report on a precise subset of your data.
Most hledger commands accept query arguments, to restrict their scope.
Multiple query terms can be provided to build up a more complex query.
Most hledger commands accept query arguments, which restrict their scope and let you report on a precise subset of your data.
Here's a quick overview of hledger's query language:
- By default, a query term is interpreted as a case-insensitive substring pattern for matching [account names](#account-names):
- An argument with no recognised query prefix is interpreted as
a case-insensitive substring pattern for matching [account names](#account-names).
Eg:
`car:fuel`\
`dining groceries`\
`car:fuel`\
- Patterns containing spaces or other [special characters](#special-characters) must be enclosed in single or double quotes:
`'personal care'`\
- These patterns are actually regular expressions,
- Patterns are actually regular expressions,
so you can add regexp metacharacters for more precision
(see "[Regular expressions](#regular-expressions)" above for details):
(or you may need to backslash-escape certain characters;
see "[Regular expressions](#regular-expressions)" above):
`'^expenses\b'`\
`'food$'`\
`'fuel|repair'`\
`'accounts (payable|receivable)'`\
- To match something other than account name, add one of the query type prefixes described in "Query types" below:
- To match something other than the account name, you can add a query type prefix, such as:
`date:202312-`\
`status:`\
@ -5271,25 +5273,23 @@ Multiple query terms can be provided to build up a more complex query.
`cur:\\$`\
`amt:'>0'`\
- Add a `not:` prefix to negate a term:
- To negate a query, add a `not:` prefix:
`not:status:'*'`\
`not:desc:'opening|closing'`\
`not:cur:USD`\
- Terms with different types are AND-ed, terms with the same type are OR-ed (mostly; see "Combining query terms" below).
The following query:
`date:2022 desc:amazon desc:amzn`
is interpreted as:
*date is in 2022 AND ( transaction description contains "amazon" OR "amzn" )*
- If you provide multiple query terms as command line arguments,
the terms with different types will be AND-ed, while
the terms with the same type will be OR-ed (mostly).\
So, `hledger print date:2022 desc:amazon desc:amzn`
means "show transactions where the date is in 2022 AND the description contains (amazon OR amzn)".
More flexible query combining is described below.
## Query types
Here are the types of query term available.
Here are the query types available:
### acct: query
**`acct:REGEX`**, or just **`REGEX`**\
@ -5339,18 +5339,6 @@ Match (or display, depending on command) accounts at or above this depth,
optionally only for accounts matching a provided regular expression.
See [Depth](#depth) for detailed rules.
### expr: query
**`expr:'QUERYEXPR'`**\
`expr` lets you write more complicated query expressions with AND, OR, NOT, and parentheses.\
Eg: `expr:'date:lastmonth and not (food or rent)'`\
The expression should be enclosed in quotes. See [Combining query terms](#combining-query-terms) below.
### not: query
**`not:QUERYTERM`**\
You can prepend **`not:`** to any other query term to negate the match.\
Eg: `not:equity`, `not:desc:apple`\
(Also, a trick: `not:not:...` can sometimes solve query problems conveniently..)
### note: query
**`note:REGEX`**\
Match transaction [notes](#payee-and-note)
@ -5390,7 +5378,15 @@ Match by tag name, and optionally also by tag value. Note:
- Postings also inherit the tags of their account and their transaction .
- Transactions also acquire the tags of their postings.
## Combining query terms
## Negative queries
### not: query
**`not:QUERY`**\
You can prepend **`not:`** to a query to negate the match.\
Eg: `not:equity`, `not:desc:apple`\
(Also, a trick: `not:not:...` can sometimes solve query problems conveniently.)
## Space-separated queries
When given multiple space-separated query terms, most commands select things which match:
@ -5406,32 +5402,49 @@ The [print](#print) command is a little different, showing transactions which:
- have no postings matching any of the negative account terms AND
- match all the other terms.
We also support more complex boolean queries with the `expr:` prefix.
This allows one to combine query terms using `and`, `or`, `not` keywords (case insensitive),
and to group them by enclosing in parentheses.
## Boolean queries
These are more complicated query expressions made by combining smaller queries
with AND, OR, NOT (case insensitive), and parentheses for grouping.
The query expression must be written inside quotes, following a prefix (not as separate command line arguments).
Also, there is a restriction: `date:` queries may not be used inside OR expressions.
<!-- That would allow disjoint report periods or unclear semantics for our reports. -->
There are three types of boolean query: `expr:`, `any:`, and `all:`.
### expr: query
**`expr:'QUERYEXPR'`**\
Some examples:
- Exclude account names containing 'food':
`expr:'date:lastmonth AND NOT (food OR rent)'` means
"match things which are dated in the last month and do not have food or rent in the account name".
(AND is the default, so could be omitted here.)
`expr:"not food"` (`not:food` is equivalent)
`expr:'expenses:food or (expenses:drink tag:A)'` means
"match things which reference the "expenses:food" account,
or which reference the "expenses:drink" account and also have a tag with an A in its name".
- Match things which have 'cool' in the description and the 'A' tag:
When using `expr:` with transaction-oriented commands like `print`,
note that posting-oriented query terms like `acct:` and `amt:` are considered to match the transaction
if they match any of its postings.\
So, `hledger print expr:'cash and amt:>0'`
means "show transactions with (at least one posting involving a cash account) and (at least one posting with a positive amount)".
`expr:"desc:cool and tag:A"` (`expr:"desc:cool tag:A"` is equivalent)
- Match things which either do not reference the 'expenses:food' account, or do have the 'A' tag:
`expr:"not expenses:food or tag:A"`
- Match things which either do not reference the 'expenses:food' account,
or which reference the 'expenses:drink' account and also have the 'A' tag:
`expr:"expenses:food or (expenses:drink and tag:A)"`
`expr:` has a restriction: `date:` queries may not be used inside `or` expressions.
That would allow disjoint report periods or disjoint result sets, with unclear semantics for our reports.
### any: query
**`any:'QUERYEXPR'`**\
Like `expr:`, but when used with transaction-oriented commands like `print`,
it matches the transaction only if a posting can be matched by all of QUERYEXPR.\
So, `hledger print any:'cash and amt:>0'`
means "show transactions where at least one posting posts a positive amount to a cash account".
### all: query
**`all:'QUERYEXPR'`**\
Like `expr:`, but when used with transaction-oriented commands like `print`,
it matches the transaction only if all postings are matched by all of QUERYEXPR.\
So, `hledger print all:'cash and amt:0'`
means "show transactions where all postings involve a cash account and have a zero amount".\
Or, `hledger print all:'cash or checking'`
means "show transactions which touch only cash and/or checking accounts".
## Queries and command options