From e360e50497f4c148a72756e2c72f987aca0b29cd Mon Sep 17 00:00:00 2001
From: Simon Michael <simon@joyful.com>
Date: Thu, 22 May 2025 16:48:22 -1000
Subject: [PATCH] imp:csv: more --debug=2 output for if rules

Also, in debug output show records more like what matchers are seeing,
ie with quotes removed.
---
 hledger-lib/Hledger/Read/RulesReader.hs      | 56 ++++++++++++++++----
 hledger-lib/Hledger/Utils/Debug.hs           | 11 ++--
 hledger/test/errors/csvamountonenonzero.test |  2 +-
 hledger/test/errors/csvamountparse.test      |  2 +-
 hledger/test/errors/csvbalanceparse.test     |  2 +-
 hledger/test/errors/csvbalancetypeparse.test |  2 +-
 hledger/test/errors/csvdateformat.test       |  2 +-
 hledger/test/errors/csvdateparse.test        |  2 +-
 8 files changed, 56 insertions(+), 23 deletions(-)

diff --git a/hledger-lib/Hledger/Read/RulesReader.hs b/hledger-lib/Hledger/Read/RulesReader.hs
index d3748debe..d4c99b5e9 100644
--- a/hledger-lib/Hledger/Read/RulesReader.hs
+++ b/hledger-lib/Hledger/Read/RulesReader.hs
@@ -86,6 +86,7 @@ import Hledger.Read.Common (aliasesFromOpts, Reader(..), InputOpts(..), amountp,
 import Hledger.Write.Csv
 import System.Directory (doesFileExist, getHomeDirectory)
 import Data.Either (fromRight)
+import Control.DeepSeq (deepseq)
 
 --- ** doctest setup
 -- $setup
@@ -178,7 +179,8 @@ readRulesFile f =
 
 -- | Inline all files referenced by include directives in this hledger CSV rules text, recursively.
 -- Included file paths may be relative to the directory of the provided file path.
--- This is done as a pre-parse step to simplify the CSV rules parser.
+-- Unlike with journal files, this is done as a pre-parse step to simplify the CSV rules parser.
+-- Unfortunately this means that the parser won't see accurate file paths and positions with included files.
 expandIncludes :: FilePath -> Text -> IO Text
 expandIncludes dir0 content = mapM (expandLine dir0) (T.lines content) <&> T.unlines
   where
@@ -315,11 +317,16 @@ type DateFormat       = Text
 -- interpreted or combined with other matchers.
 data MatcherPrefix =
     Or      -- ^ no prefix
-  | And     -- ^ &
+  | And     -- ^ &&
   | Not     -- ^ !
-  | AndNot  -- ^ & !
+  | AndNot  -- ^ && !
   deriving (Show, Eq)
 
+dbgShowMatcherPrefix Or = ""
+dbgShowMatcherPrefix And = "&&"
+dbgShowMatcherPrefix Not = "&&"
+dbgShowMatcherPrefix AndNot = "&& !"
+
 -- | A single test for matching a CSV record, in one way or another.
 data Matcher =
     RecordMatcher MatcherPrefix Regexp                          -- ^ match if this regexp matches the overall CSV record
@@ -334,6 +341,11 @@ matcherSetPrefix :: MatcherPrefix -> Matcher -> Matcher
 matcherSetPrefix p (RecordMatcher _ r)  = RecordMatcher p r
 matcherSetPrefix p (FieldMatcher _ f r) = FieldMatcher p f r
 
+dbgShowMatcher (RecordMatcher Or r)  = show $ reString r
+dbgShowMatcher (RecordMatcher p r)  = unwords [dbgShowMatcherPrefix p, show $ reString r]
+dbgShowMatcher (FieldMatcher Or f r) = unwords [T.unpack f, show $ reString r]
+dbgShowMatcher (FieldMatcher p f r) = unwords [dbgShowMatcherPrefix p, T.unpack f, show $ reString r]
+
 -- | A conditional block: a set of CSV record matchers, and a sequence
 -- of rules which will be enabled only if one or more of the matchers
 -- succeeds.
@@ -346,6 +358,9 @@ data ConditionalBlock = CB {
   ,cbAssignments :: [(HledgerFieldName, FieldTemplate)]
   } deriving (Show, Eq)
 
+dbgShowConditionalBlock :: ConditionalBlock -> String
+dbgShowConditionalBlock = unwords . map dbgShowMatcher . cbMatchers
+
 defrules :: CsvRulesParsed
 defrules = CsvRules' {
   rdirectives=[],
@@ -784,15 +799,22 @@ getEffectiveAssignment
 getEffectiveAssignment rules record f = lastMay assignments
   where
     -- all active assignments to field f, in order
-    assignments = dbg9 "csv assignments" $ toplevelassignments ++ conditionalassignments
+    assignments = toplevelassignments ++ conditionalassignments
     -- all top level field assignments
     toplevelassignments    = map (Left . snd) $ filter ((==f).fst) $ rassignments rules
     -- all conditional blocks assigning to field f and active for the current csv record
     conditionalassignments = map Right
                            $ filter (any (==f) . map fst . cbAssignments)
+                           $ dbg'
                            $ filter (isBlockActive rules record)
                            $ (rblocksassigning rules) f
 
+    dbg' [] = []
+    dbg' ms = dbg2Msg (
+      " for the " ++ T.unpack f ++ " field, these if rules matched:"
+      ++ concatMap (("\n  " ++) . dbgShowConditionalBlock) ms
+      ) ms
+
 -- does this conditional block match the current csv record ?
 isBlockActive :: CsvRules -> CsvRecord -> ConditionalBlock -> Bool
 isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers cbMatchers
@@ -812,7 +834,7 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c
     --
     matcherMatches :: Matcher -> Bool
     matcherMatches = \case
-      RecordMatcher prefix             pat -> maybeNegate prefix $ match pat $ T.intercalate "," record
+      RecordMatcher prefix             pat -> maybeNegate prefix $ match pat $ recordAsApproximateText record
       FieldMatcher  prefix csvfieldref pat -> maybeNegate prefix $ match pat $
         fromMaybe "" $ replaceCsvFieldReference rules record csvfieldref
         -- (warn msg "") where msg = "if "<>T.unpack csvfieldref<>": this should be a name declared with 'fields', or %NUM"
@@ -835,6 +857,13 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c
         (andandnots, rest) = span (\a -> matcherPrefix a `elem` [And, AndNot]) ms
         ands = [matcherSetPrefix p a | a <- andandnots, let p = if matcherPrefix a == AndNot then Not else And]
 
+-- | Convert a CSV record to text, for whole-record matching.
+-- This will be only an approximation of the original record;
+-- values will always be comma-separated,
+-- and any enclosing quotes and whitespace outside those quotes will be removed.
+recordAsApproximateText :: CsvRecord -> Text
+recordAsApproximateText = T.intercalate ","
+
 -- | Render a field assignment's template, possibly interpolating referenced
 -- CSV field values or match groups. Outer whitespace is removed from interpolated values.
 renderTemplate ::  CsvRules -> CsvRecord -> FieldTemplate -> Text
@@ -877,7 +906,7 @@ regexMatchValue rules record sgroup = let
 
 getMatchGroups :: CsvRules -> CsvRecord -> Matcher -> [Text]
 getMatchGroups _ record (RecordMatcher _ regex) =
-  regexMatchTextGroups regex $ T.intercalate "," record -- see caveats in matcherMatches
+  regexMatchTextGroups regex $ recordAsApproximateText record  -- groups might be wrong
 getMatchGroups rules record (FieldMatcher _ fieldref regex) =
   regexMatchTextGroups regex $ fromMaybe "" $ replaceCsvFieldReference rules record fieldref
 
@@ -1105,8 +1134,13 @@ validateCsv rs@(_first:_) =
 --- ** converting csv records to transactions
 
 transactionFromCsvRecord :: Bool -> Maybe TimeZone -> TimeZone -> SourcePos -> CsvRules -> CsvRecord -> Transaction
-transactionFromCsvRecord timesarezoned mtzin tzout sourcepos rules record = t
+transactionFromCsvRecord timesarezoned mtzin tzout sourcepos rules record =
+  -- log the record and all the transaction fields from this record
+  -- XXX avoid possibly-pessimising deepseq if not needed for debug output ?
+  dbg2Msg (T.unpack $ showRecord record) $ deepseq t
+  t
   where
+
     ----------------------------------------------------------------------
     -- 1. Define some helpers:
 
@@ -1375,10 +1409,6 @@ showRules rules record = T.unlines $ catMaybes
   [ (("the "<>fld<>" rule is: ")<>) <$>
     hledgerField rules record fld | fld <- journalfieldnames ]
 
--- | Show a (approximate) recreation of the original CSV record.
-showRecord :: CsvRecord -> Text
-showRecord r = "CSV record: "<>T.intercalate "," (map (wrap "\"" "\"") r)
-
 -- XXX unify these ^v
 
 -- | Almost but not quite the same as parseAmount.
@@ -1402,6 +1432,10 @@ parseBalanceAmount rules record currency n s =
       ,"the parse error is:      "<> T.pack (customErrorBundlePretty e)
       ]
 
+-- | Show the approximation of the original CSV record, labelled, for debug output.
+showRecord :: CsvRecord -> Text
+showRecord = ("record: "<>) . recordAsApproximateText
+
 -- Read a valid decimal mark from the decimal-mark rule, if any.
 -- If the rule is present with an invalid argument, raise an error.
 parseDecimalMark :: CsvRules -> Maybe DecimalMark
diff --git a/hledger-lib/Hledger/Utils/Debug.hs b/hledger-lib/Hledger/Utils/Debug.hs
index 2559d3a51..9efb0bb61 100644
--- a/hledger-lib/Hledger/Utils/Debug.hs
+++ b/hledger-lib/Hledger/Utils/Debug.hs
@@ -127,9 +127,6 @@ It's not yet possible to select debug output by topic; that would be useful.
 module Hledger.Utils.Debug (
 
   -- * Debug level
-  -- | This is parsed from a command line --debug N option, or --debug meaning 1.
-  -- The command line is read (once) by unsafePerformIO, allowing this to be used
-  -- easily anywhere in your program.
   debugLevel
 
   -- * Trace/log a string
@@ -248,9 +245,11 @@ progName =
   then reverse $ drop 4 $ reverse modifiedProgName
   else modifiedProgName
 
--- | The programs debug output verbosity. The default is 0 meaning no debug output.
--- The @--debug@ command line flag sets it to 1, or @--debug=N@ sets it to
--- a higher value (the = is required). Uses unsafePerformIO. 
+-- | The program's debug output verbosity, from 0 to 9.
+-- The default is 0 meaning no debug output.
+-- This can be overridden by running the program with a --debug [1-9] command line option;
+-- a --debug flag with no value means 1.
+-- Uses unsafePerformIO to read the command line.
 -- When running in GHCI, changing this requires reloading this module.
 debugLevel :: Int
 debugLevel = case dropWhile (/="--debug") progArgs of
diff --git a/hledger/test/errors/csvamountonenonzero.test b/hledger/test/errors/csvamountonenonzero.test
index 567b39896..7647280e2 100644
--- a/hledger/test/errors/csvamountonenonzero.test
+++ b/hledger/test/errors/csvamountonenonzero.test
@@ -1,6 +1,6 @@
 $$$ hledger check -f  csvamountonenonzero.csv
 >>>2 /Error: in CSV rules:
-While processing CSV record: "2022-01-03","1","2"
+While processing record: 2022-01-03,1,2
 while calculating amount for posting 1
 rule "amount-in %2" assigned value "1"
 rule "amount-out %3" assigned value "2"
diff --git a/hledger/test/errors/csvamountparse.test b/hledger/test/errors/csvamountparse.test
index 0712c38e3..f94fa4810 100644
--- a/hledger/test/errors/csvamountparse.test
+++ b/hledger/test/errors/csvamountparse.test
@@ -1,6 +1,6 @@
 $$$ hledger check -f  csvamountparse.csv
 >>>2 /Error: could not parse "badamount" as an amount
-CSV record: "2022-01-03","badamount"
+record: 2022-01-03,badamount
 the amount rule is: %2
 the date rule is: %1
 
diff --git a/hledger/test/errors/csvbalanceparse.test b/hledger/test/errors/csvbalanceparse.test
index efbd2fec0..e46e19d17 100644
--- a/hledger/test/errors/csvbalanceparse.test
+++ b/hledger/test/errors/csvbalanceparse.test
@@ -1,6 +1,6 @@
 $$$ hledger check -f  csvbalanceparse.csv
 >>>2 /Error: could not parse "badbalance" as balance1 amount
-CSV record: "2022-01-03","badbalance"
+record: 2022-01-03,badbalance
 the balance rule is: %2
 the date rule is: %1
 
diff --git a/hledger/test/errors/csvbalancetypeparse.test b/hledger/test/errors/csvbalancetypeparse.test
index 99ec53f1b..715f4e615 100644
--- a/hledger/test/errors/csvbalancetypeparse.test
+++ b/hledger/test/errors/csvbalancetypeparse.test
@@ -1,6 +1,6 @@
 $$$ hledger check -f  csvbalancetypeparse.csv
 >>>2 /Error: balance-type "badtype" is invalid. Use =, ==, =\* or ==\*.
-CSV record: "2022-01-01","1"
+record: 2022-01-01,1
 the balance rule is: %2
 the date rule is: %1
 /
diff --git a/hledger/test/errors/csvdateformat.test b/hledger/test/errors/csvdateformat.test
index 6d6c2e419..10fec905b 100644
--- a/hledger/test/errors/csvdateformat.test
+++ b/hledger/test/errors/csvdateformat.test
@@ -1,6 +1,6 @@
 $$$ hledger print -f  csvdateformat.csv
 >>>2 /Error: could not parse "a" as a date using date format "YYYY\/M\/D", "YYYY-M-D" or "YYYY.M.D"
-CSV record: "a","b"
+record: a,b
 the date rule is:   %1
 the date-format is: unspecified
 you may need to change your date rule, add a date-format rule, or change your skip rule
diff --git a/hledger/test/errors/csvdateparse.test b/hledger/test/errors/csvdateparse.test
index 5642537bf..8307dd355 100644
--- a/hledger/test/errors/csvdateparse.test
+++ b/hledger/test/errors/csvdateparse.test
@@ -1,6 +1,6 @@
 $$$ hledger check -f  csvdateparse.csv
 >>>2 /Error: could not parse "baddate" as a date using date format "%Y-%m-%d"
-CSV record: "baddate","b"
+record: baddate,b
 the date rule is:   %1
 the date-format is: %Y-%m-%d
 you may need to change your date rule, change your date-format rule, or change your skip rule