imp:csv: more --debug=2 output for if rules

Also, in debug output show records more like what matchers are seeing, ie with quotes removed.
2025-05-22 16:48:22 -10:00 · 2025-05-22 16:48:22 -10:00 · e360e50497
commit e360e50497
parent 9474d808dc
8 changed files with 56 additions and 23 deletions
--- a/hledger-lib/Hledger/Read/RulesReader.hs
+++ b/hledger-lib/Hledger/Read/RulesReader.hs
@ -86,6 +86,7 @@ import Hledger.Read.Common (aliasesFromOpts, Reader(..), InputOpts(..), amountp,
 import Hledger.Write.Csv
 import System.Directory (doesFileExist, getHomeDirectory)
 import Data.Either (fromRight)
+import Control.DeepSeq (deepseq)

 --- ** doctest setup
 -- $setup
@ -178,7 +179,8 @@ readRulesFile f =

 -- | Inline all files referenced by include directives in this hledger CSV rules text, recursively.
 -- Included file paths may be relative to the directory of the provided file path.
-- This is done as a pre-parse step to simplify the CSV rules parser.
+-- Unlike with journal files, this is done as a pre-parse step to simplify the CSV rules parser.
+-- Unfortunately this means that the parser won't see accurate file paths and positions with included files.
 expandIncludes :: FilePath -> Text -> IO Text
 expandIncludes dir0 content = mapM (expandLine dir0) (T.lines content) <&> T.unlines
  where
@ -315,11 +317,16 @@ type DateFormat       = Text
 -- interpreted or combined with other matchers.
 data MatcherPrefix =
    Or      -- ^ no prefix
-  | And     -- ^ &
+  | And     -- ^ &&
  | Not     -- ^ !
-  | AndNot  -- ^ & !
+  | AndNot  -- ^ && !
  deriving (Show, Eq)

+dbgShowMatcherPrefix Or = ""
+dbgShowMatcherPrefix And = "&&"
+dbgShowMatcherPrefix Not = "&&"
+dbgShowMatcherPrefix AndNot = "&& !"
+
 -- | A single test for matching a CSV record, in one way or another.
 data Matcher =
    RecordMatcher MatcherPrefix Regexp                          -- ^ match if this regexp matches the overall CSV record
@ -334,6 +341,11 @@ matcherSetPrefix :: MatcherPrefix -> Matcher -> Matcher
 matcherSetPrefix p (RecordMatcher _ r)  = RecordMatcher p r
 matcherSetPrefix p (FieldMatcher _ f r) = FieldMatcher p f r

+dbgShowMatcher (RecordMatcher Or r)  = show $ reString r
+dbgShowMatcher (RecordMatcher p r)  = unwords [dbgShowMatcherPrefix p, show $ reString r]
+dbgShowMatcher (FieldMatcher Or f r) = unwords [T.unpack f, show $ reString r]
+dbgShowMatcher (FieldMatcher p f r) = unwords [dbgShowMatcherPrefix p, T.unpack f, show $ reString r]
+
 -- | A conditional block: a set of CSV record matchers, and a sequence
 -- of rules which will be enabled only if one or more of the matchers
 -- succeeds.
@ -346,6 +358,9 @@ data ConditionalBlock = CB {
  ,cbAssignments :: [(HledgerFieldName, FieldTemplate)]
  } deriving (Show, Eq)

+dbgShowConditionalBlock :: ConditionalBlock -> String
+dbgShowConditionalBlock = unwords . map dbgShowMatcher . cbMatchers
+
 defrules :: CsvRulesParsed
 defrules = CsvRules' {
  rdirectives=[],
@ -784,15 +799,22 @@ getEffectiveAssignment
 getEffectiveAssignment rules record f = lastMay assignments
  where
    -- all active assignments to field f, in order
-    assignments = dbg9 "csv assignments" $ toplevelassignments ++ conditionalassignments
+    assignments = toplevelassignments ++ conditionalassignments
    -- all top level field assignments
    toplevelassignments    = map (Left . snd) $ filter ((==f).fst) $ rassignments rules
    -- all conditional blocks assigning to field f and active for the current csv record
    conditionalassignments = map Right
                           $ filter (any (==f) . map fst . cbAssignments)
+                           $ dbg'
                           $ filter (isBlockActive rules record)
                           $ (rblocksassigning rules) f

+    dbg' [] = []
+    dbg' ms = dbg2Msg (
+      " for the " ++ T.unpack f ++ " field, these if rules matched:"
+      ++ concatMap (("\n  " ++) . dbgShowConditionalBlock) ms
+      ) ms
+
 -- does this conditional block match the current csv record ?
 isBlockActive :: CsvRules -> CsvRecord -> ConditionalBlock -> Bool
 isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers cbMatchers
@ -812,7 +834,7 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c
    --
    matcherMatches :: Matcher -> Bool
    matcherMatches = \case
-      RecordMatcher prefix             pat -> maybeNegate prefix $ match pat $ T.intercalate "," record
+      RecordMatcher prefix             pat -> maybeNegate prefix $ match pat $ recordAsApproximateText record
      FieldMatcher  prefix csvfieldref pat -> maybeNegate prefix $ match pat $
        fromMaybe "" $ replaceCsvFieldReference rules record csvfieldref
        -- (warn msg "") where msg = "if "<>T.unpack csvfieldref<>": this should be a name declared with 'fields', or %NUM"
@ -835,6 +857,13 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c
        (andandnots, rest) = span (\a -> matcherPrefix a `elem` [And, AndNot]) ms
        ands = [matcherSetPrefix p a | a <- andandnots, let p = if matcherPrefix a == AndNot then Not else And]

+-- | Convert a CSV record to text, for whole-record matching.
+-- This will be only an approximation of the original record;
+-- values will always be comma-separated,
+-- and any enclosing quotes and whitespace outside those quotes will be removed.
+recordAsApproximateText :: CsvRecord -> Text
+recordAsApproximateText = T.intercalate ","
+
 -- | Render a field assignment's template, possibly interpolating referenced
 -- CSV field values or match groups. Outer whitespace is removed from interpolated values.
 renderTemplate ::  CsvRules -> CsvRecord -> FieldTemplate -> Text
@ -877,7 +906,7 @@ regexMatchValue rules record sgroup = let

 getMatchGroups :: CsvRules -> CsvRecord -> Matcher -> [Text]
 getMatchGroups _ record (RecordMatcher _ regex) =
-  regexMatchTextGroups regex $ T.intercalate "," record -- see caveats in matcherMatches
+  regexMatchTextGroups regex $ recordAsApproximateText record  -- groups might be wrong
 getMatchGroups rules record (FieldMatcher _ fieldref regex) =
  regexMatchTextGroups regex $ fromMaybe "" $ replaceCsvFieldReference rules record fieldref

@ -1105,8 +1134,13 @@ validateCsv rs@(_first:_) =
 --- ** converting csv records to transactions

 transactionFromCsvRecord :: Bool -> Maybe TimeZone -> TimeZone -> SourcePos -> CsvRules -> CsvRecord -> Transaction
-transactionFromCsvRecord timesarezoned mtzin tzout sourcepos rules record = t
+transactionFromCsvRecord timesarezoned mtzin tzout sourcepos rules record =
+  -- log the record and all the transaction fields from this record
+  -- XXX avoid possibly-pessimising deepseq if not needed for debug output ?
+  dbg2Msg (T.unpack $ showRecord record) $ deepseq t
+  t
  where
+
    ----------------------------------------------------------------------
    -- 1. Define some helpers:

@ -1375,10 +1409,6 @@ showRules rules record = T.unlines $ catMaybes
  [ (("the "<>fld<>" rule is: ")<>) <$>
    hledgerField rules record fld | fld <- journalfieldnames ]

-- | Show a (approximate) recreation of the original CSV record.
-showRecord :: CsvRecord -> Text
-showRecord r = "CSV record: "<>T.intercalate "," (map (wrap "\"" "\"") r)
-
 -- XXX unify these ^v

 -- | Almost but not quite the same as parseAmount.
@ -1402,6 +1432,10 @@ parseBalanceAmount rules record currency n s =
      ,"the parse error is:      "<> T.pack (customErrorBundlePretty e)
      ]

+-- | Show the approximation of the original CSV record, labelled, for debug output.
+showRecord :: CsvRecord -> Text
+showRecord = ("record: "<>) . recordAsApproximateText
+
 -- Read a valid decimal mark from the decimal-mark rule, if any.
 -- If the rule is present with an invalid argument, raise an error.
 parseDecimalMark :: CsvRules -> Maybe DecimalMark
--- a/hledger-lib/Hledger/Utils/Debug.hs
+++ b/hledger-lib/Hledger/Utils/Debug.hs
@ -127,9 +127,6 @@ It's not yet possible to select debug output by topic; that would be useful.
 module Hledger.Utils.Debug (

  -- * Debug level
-  -- | This is parsed from a command line --debug N option, or --debug meaning 1.
-  -- The command line is read (once) by unsafePerformIO, allowing this to be used
-  -- easily anywhere in your program.
  debugLevel

  -- * Trace/log a string
@ -248,9 +245,11 @@ progName =
  then reverse $ drop 4 $ reverse modifiedProgName
  else modifiedProgName

-- | The programs debug output verbosity. The default is 0 meaning no debug output.
-- The @--debug@ command line flag sets it to 1, or @--debug=N@ sets it to
-- a higher value (the = is required). Uses unsafePerformIO. 
+-- | The program's debug output verbosity, from 0 to 9.
+-- The default is 0 meaning no debug output.
+-- This can be overridden by running the program with a --debug [1-9] command line option;
+-- a --debug flag with no value means 1.
+-- Uses unsafePerformIO to read the command line.
 -- When running in GHCI, changing this requires reloading this module.
 debugLevel :: Int
 debugLevel = case dropWhile (/="--debug") progArgs of
--- a/hledger/test/errors/csvamountonenonzero.test
+++ b/hledger/test/errors/csvamountonenonzero.test
@ -1,6 +1,6 @@
 $$$ hledger check -f  csvamountonenonzero.csv
 >>>2 /Error: in CSV rules:
-While processing CSV record: "2022-01-03","1","2"
+While processing record: 2022-01-03,1,2
 while calculating amount for posting 1
 rule "amount-in %2" assigned value "1"
 rule "amount-out %3" assigned value "2"
--- a/hledger/test/errors/csvamountparse.test
+++ b/hledger/test/errors/csvamountparse.test
@ -1,6 +1,6 @@
 $$$ hledger check -f  csvamountparse.csv
 >>>2 /Error: could not parse "badamount" as an amount
-CSV record: "2022-01-03","badamount"
+record: 2022-01-03,badamount
 the amount rule is: %2
 the date rule is: %1

--- a/hledger/test/errors/csvbalanceparse.test
+++ b/hledger/test/errors/csvbalanceparse.test
@ -1,6 +1,6 @@
 $$$ hledger check -f  csvbalanceparse.csv
 >>>2 /Error: could not parse "badbalance" as balance1 amount
-CSV record: "2022-01-03","badbalance"
+record: 2022-01-03,badbalance
 the balance rule is: %2
 the date rule is: %1

--- a/hledger/test/errors/csvbalancetypeparse.test
+++ b/hledger/test/errors/csvbalancetypeparse.test
@ -1,6 +1,6 @@
 $$$ hledger check -f  csvbalancetypeparse.csv
 >>>2 /Error: balance-type "badtype" is invalid. Use =, ==, =\* or ==\*.
-CSV record: "2022-01-01","1"
+record: 2022-01-01,1
 the balance rule is: %2
 the date rule is: %1
 /
--- a/hledger/test/errors/csvdateformat.test
+++ b/hledger/test/errors/csvdateformat.test
@ -1,6 +1,6 @@
 $$$ hledger print -f  csvdateformat.csv
 >>>2 /Error: could not parse "a" as a date using date format "YYYY\/M\/D", "YYYY-M-D" or "YYYY.M.D"
-CSV record: "a","b"
+record: a,b
 the date rule is:   %1
 the date-format is: unspecified
 you may need to change your date rule, add a date-format rule, or change your skip rule
--- a/hledger/test/errors/csvdateparse.test
+++ b/hledger/test/errors/csvdateparse.test
@ -1,6 +1,6 @@
 $$$ hledger check -f  csvdateparse.csv
 >>>2 /Error: could not parse "baddate" as a date using date format "%Y-%m-%d"
-CSV record: "baddate","b"
+record: baddate,b
 the date rule is:   %1
 the date-format is: %Y-%m-%d
 you may need to change your date rule, change your date-format rule, or change your skip rule