From 81238e5f27a929de8e2e67a212a2d7e79e69ea34 Mon Sep 17 00:00:00 2001 From: Simon Michael Date: Fri, 18 Apr 2025 14:04:10 -1000 Subject: [PATCH] feat: any:, all: queries allow more powerful transaction matching --- hledger-lib/Hledger/Query.hs | 84 +++++++++++++++++++------- hledger/hledger.m4.md | 113 +++++++++++++++++++---------------- 2 files changed, 126 insertions(+), 71 deletions(-) diff --git a/hledger-lib/Hledger/Query.hs b/hledger-lib/Hledger/Query.hs index bd950ffd6..870402b99 100644 --- a/hledger-lib/Hledger/Query.hs +++ b/hledger-lib/Hledger/Query.hs @@ -95,17 +95,14 @@ import Hledger.Data.Amount (amountsRaw, mixedAmount, nullamt, usd) import Hledger.Data.Dates import Hledger.Data.Posting import Hledger.Data.Transaction +import Data.Bifunctor -- | A query is a composition of search criteria, which can be used to -- match postings, transactions, accounts and more. data Query = - -- compound queries - Not Query -- ^ negate this match - | And [Query] -- ^ match if all of these match - | Or [Query] -- ^ match if any of these match -- no-op queries - | Any -- ^ always match + Any -- ^ always match | None -- ^ never match -- data queries (in "standard" order, roughly as they appear in a transaction) | Date DateSpan -- ^ match primary dates in this date span @@ -121,6 +118,14 @@ data Query = | Real Bool -- ^ match postings with this "realness" value | Amt OrdPlus Quantity -- ^ match if the amount's numeric quantity is less than/greater than/equal to/unsignedly equal to some value | Sym Regexp -- ^ match if the commodity symbol is fully-matched by this regexp + -- compound queries (expr:) + | Not Query -- ^ negate this match + | And [Query] -- ^ match if all of these match + | Or [Query] -- ^ match if any of these match + -- compound queries for transactions (any:, all:) + -- If used in a non transaction-matching context, these are equivalent to And. + | AnyPosting [Query] -- ^ match if any one posting is matched by all of these + | AllPostings [Query] -- ^ match if all postings are matched by all of these deriving (Eq,Show) instance Default Query where def = Any @@ -263,6 +268,8 @@ queryprefixes = map (<>":") [ ,"tag" ,"type" ,"expr" + ,"any" + ,"all" ] defaultprefix :: T.Text @@ -308,6 +315,8 @@ parseQueryTerm _ (T.stripPrefix "cur:" -> Just s) = (,[]) . Sym <$> toRegexCI (" parseQueryTerm _ (T.stripPrefix "tag:" -> Just s) = (,[]) <$> parseTag s parseQueryTerm _ (T.stripPrefix "type:" -> Just s) = (,[]) <$> parseTypeCodes s parseQueryTerm d (T.stripPrefix "expr:" -> Just s) = parseBooleanQuery d s +parseQueryTerm d (T.stripPrefix "any:" -> Just s) = first (AnyPosting . (:[])) <$> parseBooleanQuery d s +parseQueryTerm d (T.stripPrefix "all:" -> Just s) = first (AllPostings . (:[])) <$> parseBooleanQuery d s parseQueryTerm _ "" = Right (Any, []) parseQueryTerm d s = parseQueryTerm d $ defaultprefix<>":"<>s @@ -805,6 +814,8 @@ matchesAmount (Any) _ = True matchesAmount (None) _ = False matchesAmount (Or qs) a = any (`matchesAmount` a) qs matchesAmount (And qs) a = all (`matchesAmount` a) qs +matchesAmount (AnyPosting qs) a = all (`matchesAmount` a) qs +matchesAmount (AllPostings qs) a = all (`matchesAmount` a) qs matchesAmount (Amt ord n) a = compareAmount ord n a matchesAmount (Sym r) a = matchesCommodity (Sym r) (acommodity a) matchesAmount _ _ = True @@ -834,6 +845,8 @@ matchesAccount (None) _ = False matchesAccount (Not m) a = not $ matchesAccount m a matchesAccount (Or ms) a = any (`matchesAccount` a) ms matchesAccount (And ms) a = all (`matchesAccount` a) ms +matchesAccount (AnyPosting qs) a = all (`matchesAccount` a) qs +matchesAccount (AllPostings qs) a = all (`matchesAccount` a) qs matchesAccount (Acct r) a = regexMatchText r a matchesAccount (Depth d) a = accountNameLevel a <= d matchesAccount (DepthAcct r d) a = accountNameLevel a <= d || not (regexMatchText r a) @@ -852,6 +865,8 @@ matchesAccountExtra :: (AccountName -> Maybe AccountType) -> (AccountName -> [Ta matchesAccountExtra atypes atags (Not q ) a = not $ matchesAccountExtra atypes atags q a matchesAccountExtra atypes atags (Or qs ) a = any (\q -> matchesAccountExtra atypes atags q a) qs matchesAccountExtra atypes atags (And qs ) a = all (\q -> matchesAccountExtra atypes atags q a) qs +matchesAccountExtra atypes atags (AnyPosting qs ) a = all (\q -> matchesAccountExtra atypes atags q a) qs +matchesAccountExtra atypes atags (AllPostings qs ) a = all (\q -> matchesAccountExtra atypes atags q a) qs matchesAccountExtra atypes _ (Type ts) a = maybe False (\t -> any (t `isAccountSubtypeOf`) ts) $ atypes a matchesAccountExtra _ atags (Tag npat vpat) a = matchesTags npat vpat $ atags a matchesAccountExtra _ _ q a = matchesAccount q a @@ -865,6 +880,8 @@ matchesPosting (Any) _ = True matchesPosting (None) _ = False matchesPosting (Or qs) p = any (`matchesPosting` p) qs matchesPosting (And qs) p = all (`matchesPosting` p) qs +matchesPosting (AnyPosting qs) p = all (`matchesPosting` p) qs +matchesPosting (AllPostings qs) p = all (`matchesPosting` p) qs matchesPosting (Code r) p = maybe False (regexMatchText r . tcode) $ ptransaction p matchesPosting (Desc r) p = maybe False (regexMatchText r . tdescription) $ ptransaction p matchesPosting (Acct r) p = matches p || maybe False matches (poriginal p) where matches = regexMatchText r . paccount @@ -889,6 +906,8 @@ matchesPostingExtra :: (AccountName -> Maybe AccountType) -> Query -> Posting -> matchesPostingExtra atype (Not q ) p = not $ matchesPostingExtra atype q p matchesPostingExtra atype (Or qs) p = any (\q -> matchesPostingExtra atype q p) qs matchesPostingExtra atype (And qs) p = all (\q -> matchesPostingExtra atype q p) qs +matchesPostingExtra atype (AnyPosting qs) p = all (\q -> matchesPostingExtra atype q p) qs +matchesPostingExtra atype (AllPostings qs) p = all (\q -> matchesPostingExtra atype q p) qs matchesPostingExtra atype (Type ts) p = -- does posting's account's type, if we can detect it, match any of the given types ? (maybe False (\t -> any (t `isAccountSubtypeOf`) ts) . atype $ paccount p) @@ -908,6 +927,8 @@ matchesTransaction (Any) _ = True matchesTransaction (None) _ = False matchesTransaction (Or qs) t = any (`matchesTransaction` t) qs matchesTransaction (And qs) t = all (`matchesTransaction` t) qs +matchesTransaction (AnyPosting qs) t = any (\p -> all (`matchesPosting` p) qs) $ tpostings t +matchesTransaction (AllPostings qs) t = all (\p -> all (`matchesPosting` p) qs) $ tpostings t matchesTransaction (Code r) t = regexMatchText r $ tcode t matchesTransaction (Desc r) t = regexMatchText r $ tdescription t matchesTransaction q@(Acct _) t = any (q `matchesPosting`) $ tpostings t @@ -932,20 +953,24 @@ matchesTransactionExtra :: (AccountName -> Maybe AccountType) -> Query -> Transa matchesTransactionExtra atype (Not q) t = not $ matchesTransactionExtra atype q t matchesTransactionExtra atype (Or qs) t = any (\q -> matchesTransactionExtra atype q t) qs matchesTransactionExtra atype (And qs) t = all (\q -> matchesTransactionExtra atype q t) qs +matchesTransactionExtra atype (AnyPosting qs) t = any (\p -> all (\q -> matchesPostingExtra atype q p) qs) $ tpostings t +matchesTransactionExtra atype (AllPostings qs) t = all (\p -> all (\q -> matchesPostingExtra atype q p) qs) $ tpostings t matchesTransactionExtra atype q@(Type _) t = any (matchesPostingExtra atype q) $ tpostings t matchesTransactionExtra _ q t = matchesTransaction q t -- | Does the query match this transaction description ? -- Tests desc: terms, any other terms are ignored. matchesDescription :: Query -> Text -> Bool -matchesDescription (Not q) d = not $ q `matchesDescription` d -matchesDescription (Any) _ = True -matchesDescription (None) _ = False -matchesDescription (Or qs) d = any (`matchesDescription` d) $ filter queryIsDesc qs -matchesDescription (And qs) d = all (`matchesDescription` d) $ filter queryIsDesc qs -matchesDescription (Code _) _ = False -matchesDescription (Desc r) d = regexMatchText r d -matchesDescription _ _ = False +matchesDescription (Not q) d = not $ q `matchesDescription` d +matchesDescription (Any) _ = True +matchesDescription (None) _ = False +matchesDescription (Or qs) d = any (`matchesDescription` d) $ filter queryIsDesc qs +matchesDescription (And qs) d = all (`matchesDescription` d) $ filter queryIsDesc qs +matchesDescription (AnyPosting qs) d = all (`matchesDescription` d) $ filter queryIsDesc qs +matchesDescription (AllPostings qs) d = all (`matchesDescription` d) $ filter queryIsDesc qs +matchesDescription (Code _) _ = False +matchesDescription (Desc r) d = regexMatchText r d +matchesDescription _ _ = False -- | Does the query match this transaction payee ? -- Tests desc: (and payee: ?) terms, any other terms are ignored. @@ -962,14 +987,16 @@ matchesTags namepat valuepat = any (matches namepat valuepat) -- | Does the query match this market price ? matchesPriceDirective :: Query -> PriceDirective -> Bool -matchesPriceDirective (None) _ = False -matchesPriceDirective (Not q) p = not $ matchesPriceDirective q p -matchesPriceDirective (Or qs) p = any (`matchesPriceDirective` p) qs -matchesPriceDirective (And qs) p = all (`matchesPriceDirective` p) qs -matchesPriceDirective q@(Amt _ _) p = matchesAmount q (pdamount p) -matchesPriceDirective q@(Sym _) p = matchesCommodity q (pdcommodity p) -matchesPriceDirective (Date spn) p = spanContainsDate spn (pddate p) -matchesPriceDirective _ _ = True +matchesPriceDirective (None) _ = False +matchesPriceDirective (Not q) p = not $ matchesPriceDirective q p +matchesPriceDirective (Or qs) p = any (`matchesPriceDirective` p) qs +matchesPriceDirective (And qs) p = all (`matchesPriceDirective` p) qs +matchesPriceDirective (AnyPosting qs) p = all (`matchesPriceDirective` p) qs +matchesPriceDirective (AllPostings qs) p = all (`matchesPriceDirective` p) qs +matchesPriceDirective q@(Amt _ _) p = matchesAmount q (pdamount p) +matchesPriceDirective q@(Sym _) p = matchesCommodity q (pdcommodity p) +matchesPriceDirective (Date spn) p = spanContainsDate spn (pddate p) +matchesPriceDirective _ _ = True -- tests @@ -1137,4 +1164,19 @@ tests_Query = testGroup "Query" [ -- a tag match on a transaction also matches posting tags assertBool "" $ (Tag (toRegex' "postingtag") Nothing) `matchesTransaction` nulltransaction{tpostings=[nullposting{ptags=[("postingtag","")]}]} + -- hledger print expr:'cash and amt:>0' means "show transactions with (at least one posting involving a cash account) and (at least one posting with a positive amount)" + let exprq = And [Acct $ toRegex' "cash", Amt Gt 0] + assertBool "" $ exprq `matchesTransaction` nulltransaction{tpostings=[nullposting{paccount="cash", pamount=1}]} + assertBool "" $ exprq `matchesTransaction` nulltransaction{tpostings = [nullposting{paccount="cash"}, nullposting{paccount="food", pamount=1}]} + + -- hledger print any:'cash and amt:>0' means "show transactions where at least one posting posts a positive amount to a cash account". + let anyq = AnyPosting [Acct $ toRegex' "cash", Amt Gt 0] + assertBool "" $ anyq `matchesTransaction` nulltransaction{tpostings=[nullposting{paccount="cash", pamount=1}]} + assertBool "" $ not $ anyq `matchesTransaction` nulltransaction{tpostings = [nullposting{paccount="cash"}, nullposting{paccount="food", pamount=1}]} + + -- hledger print all:'cash and amt:0' means "show transactions where all postings involve a cash account and have a zero amount". + assertBool "" $ AllPostings [Amt Eq 0] `matchesTransaction` + nulltransaction{tpostings = [nullposting{paccount = "cash"}, nullposting{paccount = "food"}]} + assertBool "" $ not $ AllPostings [Acct $ toRegex' "cash", Amt Eq 0] `matchesTransaction` + nulltransaction{tpostings = [nullposting{paccount = "cash"}, nullposting{paccount = "food"}]} ] diff --git a/hledger/hledger.m4.md b/hledger/hledger.m4.md index 09a45204b..3bd12f0da 100644 --- a/hledger/hledger.m4.md +++ b/hledger/hledger.m4.md @@ -5240,29 +5240,31 @@ The same would be true with the argument `--depth assets=1 --depth savings=2`. # Queries -One of hledger's strengths is being able to quickly report on a precise subset of your data. -Most hledger commands accept query arguments, to restrict their scope. -Multiple query terms can be provided to build up a more complex query. +Most hledger commands accept query arguments, which restrict their scope and let you report on a precise subset of your data. +Here's a quick overview of hledger's query language: -- By default, a query term is interpreted as a case-insensitive substring pattern for matching [account names](#account-names): +- An argument with no recognised query prefix is interpreted as + a case-insensitive substring pattern for matching [account names](#account-names). + Eg: - `car:fuel`\ `dining groceries`\ + `car:fuel`\ - Patterns containing spaces or other [special characters](#special-characters) must be enclosed in single or double quotes: `'personal care'`\ -- These patterns are actually regular expressions, +- Patterns are actually regular expressions, so you can add regexp metacharacters for more precision - (see "[Regular expressions](#regular-expressions)" above for details): + (or you may need to backslash-escape certain characters; + see "[Regular expressions](#regular-expressions)" above): `'^expenses\b'`\ `'food$'`\ `'fuel|repair'`\ `'accounts (payable|receivable)'`\ -- To match something other than account name, add one of the query type prefixes described in "Query types" below: +- To match something other than the account name, you can add a query type prefix, such as: `date:202312-`\ `status:`\ @@ -5271,25 +5273,23 @@ Multiple query terms can be provided to build up a more complex query. `cur:\\$`\ `amt:'>0'`\ -- Add a `not:` prefix to negate a term: +- To negate a query, add a `not:` prefix: `not:status:'*'`\ `not:desc:'opening|closing'`\ `not:cur:USD`\ -- Terms with different types are AND-ed, terms with the same type are OR-ed (mostly; see "Combining query terms" below). - The following query: - - `date:2022 desc:amazon desc:amzn` - - is interpreted as: - - *date is in 2022 AND ( transaction description contains "amazon" OR "amzn" )* +- If you provide multiple query terms as command line arguments, + the terms with different types will be AND-ed, while + the terms with the same type will be OR-ed (mostly).\ + So, `hledger print date:2022 desc:amazon desc:amzn` + means "show transactions where the date is in 2022 AND the description contains (amazon OR amzn)". + More flexible query combining is described below. ## Query types -Here are the types of query term available. +Here are the query types available: ### acct: query **`acct:REGEX`**, or just **`REGEX`**\ @@ -5339,18 +5339,6 @@ Match (or display, depending on command) accounts at or above this depth, optionally only for accounts matching a provided regular expression. See [Depth](#depth) for detailed rules. -### expr: query -**`expr:'QUERYEXPR'`**\ -`expr` lets you write more complicated query expressions with AND, OR, NOT, and parentheses.\ -Eg: `expr:'date:lastmonth and not (food or rent)'`\ -The expression should be enclosed in quotes. See [Combining query terms](#combining-query-terms) below. - -### not: query -**`not:QUERYTERM`**\ -You can prepend **`not:`** to any other query term to negate the match.\ -Eg: `not:equity`, `not:desc:apple`\ -(Also, a trick: `not:not:...` can sometimes solve query problems conveniently..) - ### note: query **`note:REGEX`**\ Match transaction [notes](#payee-and-note) @@ -5390,7 +5378,15 @@ Match by tag name, and optionally also by tag value. Note: - Postings also inherit the tags of their account and their transaction . - Transactions also acquire the tags of their postings. -## Combining query terms +## Negative queries + +### not: query +**`not:QUERY`**\ +You can prepend **`not:`** to a query to negate the match.\ +Eg: `not:equity`, `not:desc:apple`\ +(Also, a trick: `not:not:...` can sometimes solve query problems conveniently.) + +## Space-separated queries When given multiple space-separated query terms, most commands select things which match: @@ -5406,32 +5402,49 @@ The [print](#print) command is a little different, showing transactions which: - have no postings matching any of the negative account terms AND - match all the other terms. -We also support more complex boolean queries with the `expr:` prefix. -This allows one to combine query terms using `and`, `or`, `not` keywords (case insensitive), -and to group them by enclosing in parentheses. +## Boolean queries +These are more complicated query expressions made by combining smaller queries +with AND, OR, NOT (case insensitive), and parentheses for grouping. +The query expression must be written inside quotes, following a prefix (not as separate command line arguments). +Also, there is a restriction: `date:` queries may not be used inside OR expressions. + + +There are three types of boolean query: `expr:`, `any:`, and `all:`. + +### expr: query +**`expr:'QUERYEXPR'`**\ Some examples: -- Exclude account names containing 'food': +`expr:'date:lastmonth AND NOT (food OR rent)'` means +"match things which are dated in the last month and do not have food or rent in the account name". +(AND is the default, so could be omitted here.) - `expr:"not food"` (`not:food` is equivalent) +`expr:'expenses:food or (expenses:drink tag:A)'` means +"match things which reference the "expenses:food" account, +or which reference the "expenses:drink" account and also have a tag with an A in its name". -- Match things which have 'cool' in the description and the 'A' tag: +When using `expr:` with transaction-oriented commands like `print`, +note that posting-oriented query terms like `acct:` and `amt:` are considered to match the transaction +if they match any of its postings.\ +So, `hledger print expr:'cash and amt:>0'` +means "show transactions with (at least one posting involving a cash account) and (at least one posting with a positive amount)". - `expr:"desc:cool and tag:A"` (`expr:"desc:cool tag:A"` is equivalent) - -- Match things which either do not reference the 'expenses:food' account, or do have the 'A' tag: - - `expr:"not expenses:food or tag:A"` - -- Match things which either do not reference the 'expenses:food' account, - or which reference the 'expenses:drink' account and also have the 'A' tag: - - `expr:"expenses:food or (expenses:drink and tag:A)"` - -`expr:` has a restriction: `date:` queries may not be used inside `or` expressions. -That would allow disjoint report periods or disjoint result sets, with unclear semantics for our reports. +### any: query +**`any:'QUERYEXPR'`**\ +Like `expr:`, but when used with transaction-oriented commands like `print`, +it matches the transaction only if a posting can be matched by all of QUERYEXPR.\ +So, `hledger print any:'cash and amt:>0'` +means "show transactions where at least one posting posts a positive amount to a cash account". +### all: query +**`all:'QUERYEXPR'`**\ +Like `expr:`, but when used with transaction-oriented commands like `print`, +it matches the transaction only if all postings are matched by all of QUERYEXPR.\ +So, `hledger print all:'cash and amt:0'` +means "show transactions where all postings involve a cash account and have a zero amount".\ +Or, `hledger print all:'cash or checking'` +means "show transactions which touch only cash and/or checking accounts". ## Queries and command options