imp:csv: more --debug=2 output for if rules

Also, in debug output show records more like what matchers are seeing,
ie with quotes removed.
This commit is contained in:
Simon Michael 2025-05-22 16:48:22 -10:00
parent 9474d808dc
commit e360e50497
8 changed files with 56 additions and 23 deletions

View File

@ -86,6 +86,7 @@ import Hledger.Read.Common (aliasesFromOpts, Reader(..), InputOpts(..), amountp,
import Hledger.Write.Csv
import System.Directory (doesFileExist, getHomeDirectory)
import Data.Either (fromRight)
import Control.DeepSeq (deepseq)
--- ** doctest setup
-- $setup
@ -178,7 +179,8 @@ readRulesFile f =
-- | Inline all files referenced by include directives in this hledger CSV rules text, recursively.
-- Included file paths may be relative to the directory of the provided file path.
-- This is done as a pre-parse step to simplify the CSV rules parser.
-- Unlike with journal files, this is done as a pre-parse step to simplify the CSV rules parser.
-- Unfortunately this means that the parser won't see accurate file paths and positions with included files.
expandIncludes :: FilePath -> Text -> IO Text
expandIncludes dir0 content = mapM (expandLine dir0) (T.lines content) <&> T.unlines
where
@ -315,11 +317,16 @@ type DateFormat = Text
-- interpreted or combined with other matchers.
data MatcherPrefix =
Or -- ^ no prefix
| And -- ^ &
| And -- ^ &&
| Not -- ^ !
| AndNot -- ^ & !
| AndNot -- ^ && !
deriving (Show, Eq)
dbgShowMatcherPrefix Or = ""
dbgShowMatcherPrefix And = "&&"
dbgShowMatcherPrefix Not = "&&"
dbgShowMatcherPrefix AndNot = "&& !"
-- | A single test for matching a CSV record, in one way or another.
data Matcher =
RecordMatcher MatcherPrefix Regexp -- ^ match if this regexp matches the overall CSV record
@ -334,6 +341,11 @@ matcherSetPrefix :: MatcherPrefix -> Matcher -> Matcher
matcherSetPrefix p (RecordMatcher _ r) = RecordMatcher p r
matcherSetPrefix p (FieldMatcher _ f r) = FieldMatcher p f r
dbgShowMatcher (RecordMatcher Or r) = show $ reString r
dbgShowMatcher (RecordMatcher p r) = unwords [dbgShowMatcherPrefix p, show $ reString r]
dbgShowMatcher (FieldMatcher Or f r) = unwords [T.unpack f, show $ reString r]
dbgShowMatcher (FieldMatcher p f r) = unwords [dbgShowMatcherPrefix p, T.unpack f, show $ reString r]
-- | A conditional block: a set of CSV record matchers, and a sequence
-- of rules which will be enabled only if one or more of the matchers
-- succeeds.
@ -346,6 +358,9 @@ data ConditionalBlock = CB {
,cbAssignments :: [(HledgerFieldName, FieldTemplate)]
} deriving (Show, Eq)
dbgShowConditionalBlock :: ConditionalBlock -> String
dbgShowConditionalBlock = unwords . map dbgShowMatcher . cbMatchers
defrules :: CsvRulesParsed
defrules = CsvRules' {
rdirectives=[],
@ -784,15 +799,22 @@ getEffectiveAssignment
getEffectiveAssignment rules record f = lastMay assignments
where
-- all active assignments to field f, in order
assignments = dbg9 "csv assignments" $ toplevelassignments ++ conditionalassignments
assignments = toplevelassignments ++ conditionalassignments
-- all top level field assignments
toplevelassignments = map (Left . snd) $ filter ((==f).fst) $ rassignments rules
-- all conditional blocks assigning to field f and active for the current csv record
conditionalassignments = map Right
$ filter (any (==f) . map fst . cbAssignments)
$ dbg'
$ filter (isBlockActive rules record)
$ (rblocksassigning rules) f
dbg' [] = []
dbg' ms = dbg2Msg (
" for the " ++ T.unpack f ++ " field, these if rules matched:"
++ concatMap (("\n " ++) . dbgShowConditionalBlock) ms
) ms
-- does this conditional block match the current csv record ?
isBlockActive :: CsvRules -> CsvRecord -> ConditionalBlock -> Bool
isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers cbMatchers
@ -812,7 +834,7 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c
--
matcherMatches :: Matcher -> Bool
matcherMatches = \case
RecordMatcher prefix pat -> maybeNegate prefix $ match pat $ T.intercalate "," record
RecordMatcher prefix pat -> maybeNegate prefix $ match pat $ recordAsApproximateText record
FieldMatcher prefix csvfieldref pat -> maybeNegate prefix $ match pat $
fromMaybe "" $ replaceCsvFieldReference rules record csvfieldref
-- (warn msg "") where msg = "if "<>T.unpack csvfieldref<>": this should be a name declared with 'fields', or %NUM"
@ -835,6 +857,13 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c
(andandnots, rest) = span (\a -> matcherPrefix a `elem` [And, AndNot]) ms
ands = [matcherSetPrefix p a | a <- andandnots, let p = if matcherPrefix a == AndNot then Not else And]
-- | Convert a CSV record to text, for whole-record matching.
-- This will be only an approximation of the original record;
-- values will always be comma-separated,
-- and any enclosing quotes and whitespace outside those quotes will be removed.
recordAsApproximateText :: CsvRecord -> Text
recordAsApproximateText = T.intercalate ","
-- | Render a field assignment's template, possibly interpolating referenced
-- CSV field values or match groups. Outer whitespace is removed from interpolated values.
renderTemplate :: CsvRules -> CsvRecord -> FieldTemplate -> Text
@ -877,7 +906,7 @@ regexMatchValue rules record sgroup = let
getMatchGroups :: CsvRules -> CsvRecord -> Matcher -> [Text]
getMatchGroups _ record (RecordMatcher _ regex) =
regexMatchTextGroups regex $ T.intercalate "," record -- see caveats in matcherMatches
regexMatchTextGroups regex $ recordAsApproximateText record -- groups might be wrong
getMatchGroups rules record (FieldMatcher _ fieldref regex) =
regexMatchTextGroups regex $ fromMaybe "" $ replaceCsvFieldReference rules record fieldref
@ -1105,8 +1134,13 @@ validateCsv rs@(_first:_) =
--- ** converting csv records to transactions
transactionFromCsvRecord :: Bool -> Maybe TimeZone -> TimeZone -> SourcePos -> CsvRules -> CsvRecord -> Transaction
transactionFromCsvRecord timesarezoned mtzin tzout sourcepos rules record = t
transactionFromCsvRecord timesarezoned mtzin tzout sourcepos rules record =
-- log the record and all the transaction fields from this record
-- XXX avoid possibly-pessimising deepseq if not needed for debug output ?
dbg2Msg (T.unpack $ showRecord record) $ deepseq t
t
where
----------------------------------------------------------------------
-- 1. Define some helpers:
@ -1375,10 +1409,6 @@ showRules rules record = T.unlines $ catMaybes
[ (("the "<>fld<>" rule is: ")<>) <$>
hledgerField rules record fld | fld <- journalfieldnames ]
-- | Show a (approximate) recreation of the original CSV record.
showRecord :: CsvRecord -> Text
showRecord r = "CSV record: "<>T.intercalate "," (map (wrap "\"" "\"") r)
-- XXX unify these ^v
-- | Almost but not quite the same as parseAmount.
@ -1402,6 +1432,10 @@ parseBalanceAmount rules record currency n s =
,"the parse error is: "<> T.pack (customErrorBundlePretty e)
]
-- | Show the approximation of the original CSV record, labelled, for debug output.
showRecord :: CsvRecord -> Text
showRecord = ("record: "<>) . recordAsApproximateText
-- Read a valid decimal mark from the decimal-mark rule, if any.
-- If the rule is present with an invalid argument, raise an error.
parseDecimalMark :: CsvRules -> Maybe DecimalMark

View File

@ -127,9 +127,6 @@ It's not yet possible to select debug output by topic; that would be useful.
module Hledger.Utils.Debug (
-- * Debug level
-- | This is parsed from a command line --debug N option, or --debug meaning 1.
-- The command line is read (once) by unsafePerformIO, allowing this to be used
-- easily anywhere in your program.
debugLevel
-- * Trace/log a string
@ -248,9 +245,11 @@ progName =
then reverse $ drop 4 $ reverse modifiedProgName
else modifiedProgName
-- | The programs debug output verbosity. The default is 0 meaning no debug output.
-- The @--debug@ command line flag sets it to 1, or @--debug=N@ sets it to
-- a higher value (the = is required). Uses unsafePerformIO.
-- | The program's debug output verbosity, from 0 to 9.
-- The default is 0 meaning no debug output.
-- This can be overridden by running the program with a --debug [1-9] command line option;
-- a --debug flag with no value means 1.
-- Uses unsafePerformIO to read the command line.
-- When running in GHCI, changing this requires reloading this module.
debugLevel :: Int
debugLevel = case dropWhile (/="--debug") progArgs of

View File

@ -1,6 +1,6 @@
$$$ hledger check -f csvamountonenonzero.csv
>>>2 /Error: in CSV rules:
While processing CSV record: "2022-01-03","1","2"
While processing record: 2022-01-03,1,2
while calculating amount for posting 1
rule "amount-in %2" assigned value "1"
rule "amount-out %3" assigned value "2"

View File

@ -1,6 +1,6 @@
$$$ hledger check -f csvamountparse.csv
>>>2 /Error: could not parse "badamount" as an amount
CSV record: "2022-01-03","badamount"
record: 2022-01-03,badamount
the amount rule is: %2
the date rule is: %1

View File

@ -1,6 +1,6 @@
$$$ hledger check -f csvbalanceparse.csv
>>>2 /Error: could not parse "badbalance" as balance1 amount
CSV record: "2022-01-03","badbalance"
record: 2022-01-03,badbalance
the balance rule is: %2
the date rule is: %1

View File

@ -1,6 +1,6 @@
$$$ hledger check -f csvbalancetypeparse.csv
>>>2 /Error: balance-type "badtype" is invalid. Use =, ==, =\* or ==\*.
CSV record: "2022-01-01","1"
record: 2022-01-01,1
the balance rule is: %2
the date rule is: %1
/

View File

@ -1,6 +1,6 @@
$$$ hledger print -f csvdateformat.csv
>>>2 /Error: could not parse "a" as a date using date format "YYYY\/M\/D", "YYYY-M-D" or "YYYY.M.D"
CSV record: "a","b"
record: a,b
the date rule is: %1
the date-format is: unspecified
you may need to change your date rule, add a date-format rule, or change your skip rule

View File

@ -1,6 +1,6 @@
$$$ hledger check -f csvdateparse.csv
>>>2 /Error: could not parse "baddate" as a date using date format "%Y-%m-%d"
CSV record: "baddate","b"
record: baddate,b
the date rule is: %1
the date-format is: %Y-%m-%d
you may need to change your date rule, change your date-format rule, or change your skip rule