From e360e50497f4c148a72756e2c72f987aca0b29cd Mon Sep 17 00:00:00 2001 From: Simon Michael Date: Thu, 22 May 2025 16:48:22 -1000 Subject: [PATCH] imp:csv: more --debug=2 output for if rules Also, in debug output show records more like what matchers are seeing, ie with quotes removed. --- hledger-lib/Hledger/Read/RulesReader.hs | 56 ++++++++++++++++---- hledger-lib/Hledger/Utils/Debug.hs | 11 ++-- hledger/test/errors/csvamountonenonzero.test | 2 +- hledger/test/errors/csvamountparse.test | 2 +- hledger/test/errors/csvbalanceparse.test | 2 +- hledger/test/errors/csvbalancetypeparse.test | 2 +- hledger/test/errors/csvdateformat.test | 2 +- hledger/test/errors/csvdateparse.test | 2 +- 8 files changed, 56 insertions(+), 23 deletions(-) diff --git a/hledger-lib/Hledger/Read/RulesReader.hs b/hledger-lib/Hledger/Read/RulesReader.hs index d3748debe..d4c99b5e9 100644 --- a/hledger-lib/Hledger/Read/RulesReader.hs +++ b/hledger-lib/Hledger/Read/RulesReader.hs @@ -86,6 +86,7 @@ import Hledger.Read.Common (aliasesFromOpts, Reader(..), InputOpts(..), amountp, import Hledger.Write.Csv import System.Directory (doesFileExist, getHomeDirectory) import Data.Either (fromRight) +import Control.DeepSeq (deepseq) --- ** doctest setup -- $setup @@ -178,7 +179,8 @@ readRulesFile f = -- | Inline all files referenced by include directives in this hledger CSV rules text, recursively. -- Included file paths may be relative to the directory of the provided file path. --- This is done as a pre-parse step to simplify the CSV rules parser. +-- Unlike with journal files, this is done as a pre-parse step to simplify the CSV rules parser. +-- Unfortunately this means that the parser won't see accurate file paths and positions with included files. expandIncludes :: FilePath -> Text -> IO Text expandIncludes dir0 content = mapM (expandLine dir0) (T.lines content) <&> T.unlines where @@ -315,11 +317,16 @@ type DateFormat = Text -- interpreted or combined with other matchers. data MatcherPrefix = Or -- ^ no prefix - | And -- ^ & + | And -- ^ && | Not -- ^ ! - | AndNot -- ^ & ! + | AndNot -- ^ && ! deriving (Show, Eq) +dbgShowMatcherPrefix Or = "" +dbgShowMatcherPrefix And = "&&" +dbgShowMatcherPrefix Not = "&&" +dbgShowMatcherPrefix AndNot = "&& !" + -- | A single test for matching a CSV record, in one way or another. data Matcher = RecordMatcher MatcherPrefix Regexp -- ^ match if this regexp matches the overall CSV record @@ -334,6 +341,11 @@ matcherSetPrefix :: MatcherPrefix -> Matcher -> Matcher matcherSetPrefix p (RecordMatcher _ r) = RecordMatcher p r matcherSetPrefix p (FieldMatcher _ f r) = FieldMatcher p f r +dbgShowMatcher (RecordMatcher Or r) = show $ reString r +dbgShowMatcher (RecordMatcher p r) = unwords [dbgShowMatcherPrefix p, show $ reString r] +dbgShowMatcher (FieldMatcher Or f r) = unwords [T.unpack f, show $ reString r] +dbgShowMatcher (FieldMatcher p f r) = unwords [dbgShowMatcherPrefix p, T.unpack f, show $ reString r] + -- | A conditional block: a set of CSV record matchers, and a sequence -- of rules which will be enabled only if one or more of the matchers -- succeeds. @@ -346,6 +358,9 @@ data ConditionalBlock = CB { ,cbAssignments :: [(HledgerFieldName, FieldTemplate)] } deriving (Show, Eq) +dbgShowConditionalBlock :: ConditionalBlock -> String +dbgShowConditionalBlock = unwords . map dbgShowMatcher . cbMatchers + defrules :: CsvRulesParsed defrules = CsvRules' { rdirectives=[], @@ -784,15 +799,22 @@ getEffectiveAssignment getEffectiveAssignment rules record f = lastMay assignments where -- all active assignments to field f, in order - assignments = dbg9 "csv assignments" $ toplevelassignments ++ conditionalassignments + assignments = toplevelassignments ++ conditionalassignments -- all top level field assignments toplevelassignments = map (Left . snd) $ filter ((==f).fst) $ rassignments rules -- all conditional blocks assigning to field f and active for the current csv record conditionalassignments = map Right $ filter (any (==f) . map fst . cbAssignments) + $ dbg' $ filter (isBlockActive rules record) $ (rblocksassigning rules) f + dbg' [] = [] + dbg' ms = dbg2Msg ( + " for the " ++ T.unpack f ++ " field, these if rules matched:" + ++ concatMap (("\n " ++) . dbgShowConditionalBlock) ms + ) ms + -- does this conditional block match the current csv record ? isBlockActive :: CsvRules -> CsvRecord -> ConditionalBlock -> Bool isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers cbMatchers @@ -812,7 +834,7 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c -- matcherMatches :: Matcher -> Bool matcherMatches = \case - RecordMatcher prefix pat -> maybeNegate prefix $ match pat $ T.intercalate "," record + RecordMatcher prefix pat -> maybeNegate prefix $ match pat $ recordAsApproximateText record FieldMatcher prefix csvfieldref pat -> maybeNegate prefix $ match pat $ fromMaybe "" $ replaceCsvFieldReference rules record csvfieldref -- (warn msg "") where msg = "if "<>T.unpack csvfieldref<>": this should be a name declared with 'fields', or %NUM" @@ -835,6 +857,13 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c (andandnots, rest) = span (\a -> matcherPrefix a `elem` [And, AndNot]) ms ands = [matcherSetPrefix p a | a <- andandnots, let p = if matcherPrefix a == AndNot then Not else And] +-- | Convert a CSV record to text, for whole-record matching. +-- This will be only an approximation of the original record; +-- values will always be comma-separated, +-- and any enclosing quotes and whitespace outside those quotes will be removed. +recordAsApproximateText :: CsvRecord -> Text +recordAsApproximateText = T.intercalate "," + -- | Render a field assignment's template, possibly interpolating referenced -- CSV field values or match groups. Outer whitespace is removed from interpolated values. renderTemplate :: CsvRules -> CsvRecord -> FieldTemplate -> Text @@ -877,7 +906,7 @@ regexMatchValue rules record sgroup = let getMatchGroups :: CsvRules -> CsvRecord -> Matcher -> [Text] getMatchGroups _ record (RecordMatcher _ regex) = - regexMatchTextGroups regex $ T.intercalate "," record -- see caveats in matcherMatches + regexMatchTextGroups regex $ recordAsApproximateText record -- groups might be wrong getMatchGroups rules record (FieldMatcher _ fieldref regex) = regexMatchTextGroups regex $ fromMaybe "" $ replaceCsvFieldReference rules record fieldref @@ -1105,8 +1134,13 @@ validateCsv rs@(_first:_) = --- ** converting csv records to transactions transactionFromCsvRecord :: Bool -> Maybe TimeZone -> TimeZone -> SourcePos -> CsvRules -> CsvRecord -> Transaction -transactionFromCsvRecord timesarezoned mtzin tzout sourcepos rules record = t +transactionFromCsvRecord timesarezoned mtzin tzout sourcepos rules record = + -- log the record and all the transaction fields from this record + -- XXX avoid possibly-pessimising deepseq if not needed for debug output ? + dbg2Msg (T.unpack $ showRecord record) $ deepseq t + t where + ---------------------------------------------------------------------- -- 1. Define some helpers: @@ -1375,10 +1409,6 @@ showRules rules record = T.unlines $ catMaybes [ (("the "<>fld<>" rule is: ")<>) <$> hledgerField rules record fld | fld <- journalfieldnames ] --- | Show a (approximate) recreation of the original CSV record. -showRecord :: CsvRecord -> Text -showRecord r = "CSV record: "<>T.intercalate "," (map (wrap "\"" "\"") r) - -- XXX unify these ^v -- | Almost but not quite the same as parseAmount. @@ -1402,6 +1432,10 @@ parseBalanceAmount rules record currency n s = ,"the parse error is: "<> T.pack (customErrorBundlePretty e) ] +-- | Show the approximation of the original CSV record, labelled, for debug output. +showRecord :: CsvRecord -> Text +showRecord = ("record: "<>) . recordAsApproximateText + -- Read a valid decimal mark from the decimal-mark rule, if any. -- If the rule is present with an invalid argument, raise an error. parseDecimalMark :: CsvRules -> Maybe DecimalMark diff --git a/hledger-lib/Hledger/Utils/Debug.hs b/hledger-lib/Hledger/Utils/Debug.hs index 2559d3a51..9efb0bb61 100644 --- a/hledger-lib/Hledger/Utils/Debug.hs +++ b/hledger-lib/Hledger/Utils/Debug.hs @@ -127,9 +127,6 @@ It's not yet possible to select debug output by topic; that would be useful. module Hledger.Utils.Debug ( -- * Debug level - -- | This is parsed from a command line --debug N option, or --debug meaning 1. - -- The command line is read (once) by unsafePerformIO, allowing this to be used - -- easily anywhere in your program. debugLevel -- * Trace/log a string @@ -248,9 +245,11 @@ progName = then reverse $ drop 4 $ reverse modifiedProgName else modifiedProgName --- | The programs debug output verbosity. The default is 0 meaning no debug output. --- The @--debug@ command line flag sets it to 1, or @--debug=N@ sets it to --- a higher value (the = is required). Uses unsafePerformIO. +-- | The program's debug output verbosity, from 0 to 9. +-- The default is 0 meaning no debug output. +-- This can be overridden by running the program with a --debug [1-9] command line option; +-- a --debug flag with no value means 1. +-- Uses unsafePerformIO to read the command line. -- When running in GHCI, changing this requires reloading this module. debugLevel :: Int debugLevel = case dropWhile (/="--debug") progArgs of diff --git a/hledger/test/errors/csvamountonenonzero.test b/hledger/test/errors/csvamountonenonzero.test index 567b39896..7647280e2 100644 --- a/hledger/test/errors/csvamountonenonzero.test +++ b/hledger/test/errors/csvamountonenonzero.test @@ -1,6 +1,6 @@ $$$ hledger check -f csvamountonenonzero.csv >>>2 /Error: in CSV rules: -While processing CSV record: "2022-01-03","1","2" +While processing record: 2022-01-03,1,2 while calculating amount for posting 1 rule "amount-in %2" assigned value "1" rule "amount-out %3" assigned value "2" diff --git a/hledger/test/errors/csvamountparse.test b/hledger/test/errors/csvamountparse.test index 0712c38e3..f94fa4810 100644 --- a/hledger/test/errors/csvamountparse.test +++ b/hledger/test/errors/csvamountparse.test @@ -1,6 +1,6 @@ $$$ hledger check -f csvamountparse.csv >>>2 /Error: could not parse "badamount" as an amount -CSV record: "2022-01-03","badamount" +record: 2022-01-03,badamount the amount rule is: %2 the date rule is: %1 diff --git a/hledger/test/errors/csvbalanceparse.test b/hledger/test/errors/csvbalanceparse.test index efbd2fec0..e46e19d17 100644 --- a/hledger/test/errors/csvbalanceparse.test +++ b/hledger/test/errors/csvbalanceparse.test @@ -1,6 +1,6 @@ $$$ hledger check -f csvbalanceparse.csv >>>2 /Error: could not parse "badbalance" as balance1 amount -CSV record: "2022-01-03","badbalance" +record: 2022-01-03,badbalance the balance rule is: %2 the date rule is: %1 diff --git a/hledger/test/errors/csvbalancetypeparse.test b/hledger/test/errors/csvbalancetypeparse.test index 99ec53f1b..715f4e615 100644 --- a/hledger/test/errors/csvbalancetypeparse.test +++ b/hledger/test/errors/csvbalancetypeparse.test @@ -1,6 +1,6 @@ $$$ hledger check -f csvbalancetypeparse.csv >>>2 /Error: balance-type "badtype" is invalid. Use =, ==, =\* or ==\*. -CSV record: "2022-01-01","1" +record: 2022-01-01,1 the balance rule is: %2 the date rule is: %1 / diff --git a/hledger/test/errors/csvdateformat.test b/hledger/test/errors/csvdateformat.test index 6d6c2e419..10fec905b 100644 --- a/hledger/test/errors/csvdateformat.test +++ b/hledger/test/errors/csvdateformat.test @@ -1,6 +1,6 @@ $$$ hledger print -f csvdateformat.csv >>>2 /Error: could not parse "a" as a date using date format "YYYY\/M\/D", "YYYY-M-D" or "YYYY.M.D" -CSV record: "a","b" +record: a,b the date rule is: %1 the date-format is: unspecified you may need to change your date rule, add a date-format rule, or change your skip rule diff --git a/hledger/test/errors/csvdateparse.test b/hledger/test/errors/csvdateparse.test index 5642537bf..8307dd355 100644 --- a/hledger/test/errors/csvdateparse.test +++ b/hledger/test/errors/csvdateparse.test @@ -1,6 +1,6 @@ $$$ hledger check -f csvdateparse.csv >>>2 /Error: could not parse "baddate" as a date using date format "%Y-%m-%d" -CSV record: "baddate","b" +record: baddate,b the date rule is: %1 the date-format is: %Y-%m-%d you may need to change your date rule, change your date-format rule, or change your skip rule