lib: condition tables in csv rules + tests
This commit is contained in:
		
							parent
							
								
									a7bb6b9207
								
							
						
					
					
						commit
						834e9ec104
					
				| @ -42,16 +42,16 @@ where | ||||
| --- ** imports | ||||
| import Prelude () | ||||
| import "base-compat-batteries" Prelude.Compat hiding (fail) | ||||
| import qualified "base-compat-batteries" Control.Monad.Fail.Compat as Fail (fail) | ||||
| import Control.Exception          (IOException, handle, throw) | ||||
| import Control.Monad              (liftM, unless, when) | ||||
| import Control.Monad.Except       (ExceptT, throwError) | ||||
| import Control.Monad.IO.Class     (MonadIO, liftIO) | ||||
| import Control.Monad.State.Strict (StateT, get, modify', evalStateT) | ||||
| import Control.Monad.Trans.Class  (lift) | ||||
| import Data.Char                  (toLower, isDigit, isSpace, ord) | ||||
| import Data.Char                  (toLower, isDigit, isSpace, isAlphaNum, ord) | ||||
| import Data.Bifunctor             (first) | ||||
| import "base-compat-batteries" Data.List.Compat | ||||
| import qualified Data.List.Split as LS (splitOn) | ||||
| import Data.Maybe | ||||
| import Data.Ord | ||||
| import qualified Data.Set as S | ||||
| @ -186,6 +186,9 @@ addAssignmentsFromList fs r = foldl' maybeAddAssignment r journalfieldnames | ||||
| addConditionalBlock :: ConditionalBlock -> CsvRules -> CsvRules | ||||
| addConditionalBlock b r = r{rconditionalblocks=b:rconditionalblocks r} | ||||
| 
 | ||||
| addConditionalBlocks :: [ConditionalBlock] -> CsvRules -> CsvRules | ||||
| addConditionalBlocks bs r = r{rconditionalblocks=bs++rconditionalblocks r} | ||||
| 
 | ||||
| getDirective :: DirectiveName -> CsvRules -> Maybe FieldTemplate | ||||
| getDirective directivename = lookup directivename . rdirectives | ||||
| 
 | ||||
| @ -367,11 +370,14 @@ DIGIT: 0-9 | ||||
| 
 | ||||
| rulesp :: CsvRulesParser CsvRules | ||||
| rulesp = do | ||||
|   _ <- many $ choiceInState | ||||
|   _ <- many $ choice | ||||
|     [blankorcommentlinep                                                  <?> "blank or comment line" | ||||
|     ,(directivep        >>= modify' . addDirective)                       <?> "directive" | ||||
|     ,(fieldnamelistp    >>= modify' . setIndexesAndAssignmentsFromList)   <?> "field name list" | ||||
|     ,(fieldassignmentp  >>= modify' . addAssignment)                      <?> "field assignment" | ||||
|     -- conditionaltablep backtracks because it shares "if" prefix with conditionalblockp and the | ||||
|     -- reverse is there to ensure that conditions are added in the order they listed in the file | ||||
|     ,try (conditionaltablep >>= modify' . addConditionalBlocks . reverse) <?> "conditional table" | ||||
|     ,(conditionalblockp >>= modify' . addConditionalBlock)                <?> "conditional block" | ||||
|     ] | ||||
|   eof | ||||
| @ -504,26 +510,60 @@ fieldvalp = do | ||||
| conditionalblockp :: CsvRulesParser ConditionalBlock | ||||
| conditionalblockp = do | ||||
|   lift $ dbgparse 8 "trying conditionalblockp" | ||||
|   string "if" >> lift (skipMany spacenonewline) >> optional newline | ||||
|   -- "if\nMATCHER" or "if    \nMATCHER" or "if MATCHER" | ||||
|   start <- getOffset | ||||
|   string "if" >> ( (newline >> return Nothing) | ||||
|                   <|> (lift (skipSome spacenonewline) >> optional newline)) | ||||
|   ms <- some matcherp | ||||
|   as <- many (try $ lift (skipSome spacenonewline) >> fieldassignmentp) | ||||
|   as <- catMaybes <$> | ||||
|     many (lift (skipSome spacenonewline) >> | ||||
|           choice [ lift eolof >> return Nothing | ||||
|                  , fmap Just fieldassignmentp | ||||
|                  ]) | ||||
|   when (null as) $ | ||||
|     Fail.fail "start of conditional block found, but no assignment rules afterward\n(assignment rules in a conditional block should be indented)\n" | ||||
|     customFailure $ parseErrorAt start $  "start of conditional block found, but no assignment rules afterward\n(assignment rules in a conditional block should be indented)\n" | ||||
|   return $ CB{cbMatchers=ms, cbAssignments=as} | ||||
|   <?> "conditional block" | ||||
| 
 | ||||
| -- A conditional table: "if" followed by separator, followed by some field names, | ||||
| -- followed by many lines, each of which has: | ||||
| -- one matchers, followed by field assignments (as many as there were fields) | ||||
| conditionaltablep :: CsvRulesParser [ConditionalBlock] | ||||
| conditionaltablep = do | ||||
|   lift $ dbgparse 8 "trying conditionaltablep" | ||||
|   start <- getOffset | ||||
|   string "if"  | ||||
|   sep <- lift $ satisfy (not.isAlphaNum) | ||||
|   fields <- journalfieldnamep `sepBy1` (char sep) | ||||
|   newline | ||||
|   body <- flip manyTill (lift eolof) $ do | ||||
|     off <- getOffset | ||||
|     m <- matcherp' (char sep >> return ()) | ||||
|     vs <- LS.splitOn [sep] <$> lift restofline | ||||
|     if (length vs /= length fields) | ||||
|       then customFailure $ parseErrorAt off $ ((printf "line of conditional table should have %d values, but this one has only %d\n" (length fields) (length vs)) :: String) | ||||
|       else return (m,vs) | ||||
|   when (null body) $ | ||||
|     customFailure $ parseErrorAt start $ "start of conditional table found, but no assignment rules afterward\n" | ||||
|   return $ flip map body $ \(m,vs) -> | ||||
|     CB{cbMatchers=[m], cbAssignments=zip fields vs} | ||||
|   <?> "conditional table" | ||||
| 
 | ||||
| -- A single matcher, on one line. | ||||
| matcherp' :: CsvRulesParser () -> CsvRulesParser Matcher | ||||
| matcherp' end = try (fieldmatcherp end) <|> recordmatcherp end | ||||
| 
 | ||||
| matcherp :: CsvRulesParser Matcher | ||||
| matcherp = try fieldmatcherp <|> recordmatcherp | ||||
| matcherp = matcherp' (lift eolof) | ||||
| 
 | ||||
| -- A single whole-record matcher. | ||||
| -- A pattern on the whole line, not beginning with a csv field reference. | ||||
| recordmatcherp :: CsvRulesParser Matcher | ||||
| recordmatcherp = do | ||||
|   lift $ dbgparse 8 "trying matcherp" | ||||
| recordmatcherp :: CsvRulesParser () -> CsvRulesParser Matcher | ||||
| recordmatcherp end = do | ||||
|   lift $ dbgparse 8 "trying recordmatcherp" | ||||
|   -- pos <- currentPos | ||||
|   -- _  <- optional (matchoperatorp >> lift (skipMany spacenonewline) >> optional newline) | ||||
|   r <- regexp | ||||
|   r <- regexp end | ||||
|   -- when (null ps) $ | ||||
|   --   Fail.fail "start of record matcher found, but no patterns afterward\n(patterns should not be indented)\n" | ||||
|   return $ RecordMatcher r | ||||
| @ -533,8 +573,8 @@ recordmatcherp = do | ||||
| -- (like %date or %1), and a pattern on the rest of the line, | ||||
| -- optionally space-separated. Eg: | ||||
| -- %description chez jacques | ||||
| fieldmatcherp :: CsvRulesParser Matcher | ||||
| fieldmatcherp = do | ||||
| fieldmatcherp :: CsvRulesParser () -> CsvRulesParser Matcher | ||||
| fieldmatcherp end = do | ||||
|   lift $ dbgparse 8 "trying fieldmatcher" | ||||
|   -- An optional fieldname (default: "all") | ||||
|   -- f <- fromMaybe "all" `fmap` (optional $ do | ||||
| @ -545,7 +585,7 @@ fieldmatcherp = do | ||||
|   -- optional operator.. just ~ (case insensitive infix regex) for now | ||||
|   -- _op <- fromMaybe "~" <$> optional matchoperatorp | ||||
|   lift (skipMany spacenonewline) | ||||
|   r <- regexp | ||||
|   r <- regexp end | ||||
|   return $ FieldMatcher f r | ||||
|   <?> "field matcher" | ||||
| 
 | ||||
| @ -557,12 +597,12 @@ csvfieldreferencep = do | ||||
|   return $ '%' : quoteIfNeeded f | ||||
| 
 | ||||
| -- A single regular expression | ||||
| regexp :: CsvRulesParser RegexpPattern | ||||
| regexp = do | ||||
| regexp :: CsvRulesParser () -> CsvRulesParser RegexpPattern | ||||
| regexp end = do | ||||
|   lift $ dbgparse 8 "trying regexp" | ||||
|   -- notFollowedBy matchoperatorp | ||||
|   c <- lift nonspace | ||||
|   cs <- anySingle `manyTill` lift eolof | ||||
|   cs <- anySingle `manyTill` end | ||||
|   return $ strip $ c:cs | ||||
| 
 | ||||
| -- -- A match operator, indicating the type of match to perform. | ||||
|  | ||||
| @ -520,7 +520,7 @@ separator TAB | ||||
| 
 | ||||
| See also: [File Extension](#file-extension). | ||||
| 
 | ||||
| ## `if` | ||||
| ## `if` block | ||||
| 
 | ||||
| ```rules | ||||
| if MATCHER | ||||
| @ -590,6 +590,57 @@ banking thru software | ||||
|  comment  XXX deductible ? check it | ||||
| ``` | ||||
| 
 | ||||
| ## `if` table | ||||
| 
 | ||||
| ```rules | ||||
| if,CSVFIELDNAME1,CSVFIELDNAME2,...,CSVFIELDNAMEn | ||||
| MATCHER1,VALUE11,VALUE12,...,VALUE1n | ||||
| MATCHER2,VALUE21,VALUE22,...,VALUE2n | ||||
| MATCHER3,VALUE31,VALUE32,...,VALUE3n | ||||
| <empty line> | ||||
| ``` | ||||
| 
 | ||||
| Conditional tables ("if tables") are a different syntax to specify | ||||
| field assignments that will be applied only to CSV records which match certain patterns. | ||||
| 
 | ||||
| MATCHER could be either field or record matcher, as described above. When MATCHER matches, | ||||
| values from that row would be assigned to the CSV fields named on the `if` line, in the same order. | ||||
| 
 | ||||
| Therefore `if` table is exactly equivalent to a sequence of of `if` blocks: | ||||
| ```rules | ||||
| if MATCHER1 | ||||
|   CSVFIELDNAME1 VALUE11 | ||||
|   CSVFIELDNAME2 VALUE12 | ||||
|   ... | ||||
|   CSVFIELDNAMEn VALUE1n | ||||
| 
 | ||||
| if MATCHER2 | ||||
|   CSVFIELDNAME1 VALUE21 | ||||
|   CSVFIELDNAME2 VALUE22 | ||||
|   ... | ||||
|   CSVFIELDNAMEn VALUE2n | ||||
| 
 | ||||
| if MATCHER3 | ||||
|   CSVFIELDNAME1 VALUE31 | ||||
|   CSVFIELDNAME2 VALUE32 | ||||
|   ... | ||||
|   CSVFIELDNAMEn VALUE3n | ||||
| ``` | ||||
| 
 | ||||
| Each line starting with MATCHER should contain enough (possibly empty) values for all the listed fields. | ||||
| 
 | ||||
| Rules would be checked and applied in the order they are listed in the table and, like with `if` blocks, later rules (in the same or another table) or `if` blocks could override the effect of any rule. | ||||
| 
 | ||||
| Instead of ',' you can use a variety of other non-alphanumeric characters as a separator. First character after `if` is taken to be the separator for the rest of the table. It is the responsibility of the user to ensure that separator does not occur inside MATCHERs and values - there is no way to escape separator. | ||||
| 
 | ||||
| 
 | ||||
| Example: | ||||
| ```rules | ||||
| if,account2,comment | ||||
| atm transaction fee,expenses:business:banking,deductible? check it | ||||
| %description groceries,expenses:groceries, | ||||
| 2020/01/12.*Plumbing LLC,expenses:house:upkeep,emergency plumbing call-out | ||||
| ``` | ||||
| 
 | ||||
| ## `end` | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										168
									
								
								tests/csv.test
									
									
									
									
									
								
							
							
						
						
									
										168
									
								
								tests/csv.test
									
									
									
									
									
								
							| @ -665,6 +665,174 @@ $  ./csvtest.sh | ||||
| 
 | ||||
| >=0 | ||||
| 
 | ||||
| # 35. tabular rules assigning multiple fields | ||||
| < | ||||
| 10/2009/09,Flubber Co,50 | ||||
| 
 | ||||
| RULES | ||||
| fields date, description, amount | ||||
| date-format %d/%Y/%m | ||||
| currency $ | ||||
| account1 assets:myacct | ||||
| if,account2,comment | ||||
| Flubber,acct,cmt | ||||
| $  ./csvtest.sh | ||||
| 2009-09-10 Flubber Co  ; cmt | ||||
|     assets:myacct             $50 | ||||
|     acct                     $-50 | ||||
| 
 | ||||
| >=0 | ||||
| 
 | ||||
| # 36. tabular rules assigning multiple fields followed by regular rules | ||||
| < | ||||
| 10/2009/09,Flubber Co,50 | ||||
| 10/2009/09,Blubber Co,150 | ||||
| 
 | ||||
| RULES | ||||
| fields date, description, amount | ||||
| date-format %d/%Y/%m | ||||
| currency $ | ||||
| account1 assets:myacct | ||||
| if,account2,comment | ||||
| Flubber,acct,cmt | ||||
| 
 | ||||
| if | ||||
| Blubber | ||||
|   account2   acct2 | ||||
|   comment     cmt2 | ||||
| $  ./csvtest.sh | ||||
| 2009-09-10 Flubber Co  ; cmt | ||||
|     assets:myacct             $50 | ||||
|     acct                     $-50 | ||||
| 
 | ||||
| 2009-09-10 Blubber Co  ; cmt2 | ||||
|     assets:myacct            $150 | ||||
|     acct2                   $-150 | ||||
| 
 | ||||
| >=0 | ||||
| 
 | ||||
| # 37. tabular rules with empty values | ||||
| < | ||||
| 10/2009/09,Flubber Co,50 | ||||
| 10/2009/09,Blubber Co,150 | ||||
| 
 | ||||
| RULES | ||||
| fields date, description, amount | ||||
| date-format %d/%Y/%m | ||||
| currency $ | ||||
| account1 assets:myacct | ||||
| if,account2,comment | ||||
| Flubber,acct, | ||||
| Blubber,acct2, | ||||
| $  ./csvtest.sh | ||||
| 2009-09-10 Flubber Co | ||||
|     assets:myacct             $50 | ||||
|     acct                     $-50 | ||||
| 
 | ||||
| 2009-09-10 Blubber Co | ||||
|     assets:myacct            $150 | ||||
|     acct2                   $-150 | ||||
| 
 | ||||
| >=0 | ||||
| 
 | ||||
| # 38. tabular rules with field matchers and '|' separator | ||||
| < | ||||
| 10/2009/09,Flubber Co,50 | ||||
| 10/2009/09,Blubber Co,150 | ||||
| 
 | ||||
| RULES | ||||
| fields date, description, amount | ||||
| date-format %d/%Y/%m | ||||
| currency $ | ||||
| account1 assets:myacct | ||||
| if|account2|comment | ||||
| %description Flubber|acct| | ||||
| %amount 150|acct2|cmt2 | ||||
| $  ./csvtest.sh | ||||
| 2009-09-10 Flubber Co | ||||
|     assets:myacct             $50 | ||||
|     acct                     $-50 | ||||
| 
 | ||||
| 2009-09-10 Blubber Co  ; cmt2 | ||||
|     assets:myacct            $150 | ||||
|     acct2                   $-150 | ||||
| 
 | ||||
| >=0 | ||||
| 
 | ||||
| # 39. Insfficient number of values in tabular rules error | ||||
| < | ||||
| 10/2009/09,Flubber Co,50 | ||||
| 10/2009/09,Blubber Co,150 | ||||
| 
 | ||||
| RULES | ||||
| fields date, description, amount | ||||
| date-format %d/%Y/%m | ||||
| currency $ | ||||
| account1 assets:myacct | ||||
| if|account2|comment | ||||
| %amount 150|acct2 | ||||
| %description Flubber|acct| | ||||
| $  ./csvtest.sh | ||||
| >2 | ||||
| hledger: user error (input.rules:6:1: | ||||
|   | | ||||
| 6 | %amount 150|acct2 | ||||
|   | ^ | ||||
| line of conditional table should have 2 values, but this one has only 1 | ||||
| 
 | ||||
| ) | ||||
| >=1 | ||||
| 
 | ||||
| # 40. unindented condition block error | ||||
| < | ||||
| 10/2009/09,Flubber Co,50 | ||||
| 
 | ||||
| RULES | ||||
| fields date, description, amount | ||||
| date-format %d/%Y/%m | ||||
| currency $ | ||||
| account1 assets:myacct | ||||
| if Flubber | ||||
| account2 acct | ||||
| comment cmt | ||||
| $  ./csvtest.sh | ||||
| >2 | ||||
| hledger: user error (input.rules:5:1: | ||||
|   | | ||||
| 5 | if Flubber | ||||
|   | ^ | ||||
| start of conditional block found, but no assignment rules afterward | ||||
| (assignment rules in a conditional block should be indented) | ||||
| 
 | ||||
| ) | ||||
| >=1 | ||||
| 
 | ||||
| # 41. Assignment to custom field (#1264) + spaces after the if (#1120) | ||||
| < | ||||
| 10/2009/09,Flubber Co,50 | ||||
| 
 | ||||
| RULES | ||||
| fields date, description, amount | ||||
| date-format %d/%Y/%m | ||||
| currency $ | ||||
| account1 assets:myacct | ||||
| if Flubber | ||||
|   myaccount2 acct | ||||
|   comment cmt | ||||
| 
 | ||||
| 
 | ||||
| account2 %myaccount2 | ||||
| $  ./csvtest.sh | ||||
| >2 | ||||
| hledger: user error (input.rules:6:3: | ||||
|   | | ||||
| 6 |   myaccount2 acct | ||||
|   |   ^^^^^^^^^^^^ | ||||
| unexpected "myaccount2 a" | ||||
| expecting end of input, field assignment, or newline | ||||
| ) | ||||
| >=1 | ||||
| 
 | ||||
| ## .  | ||||
| #< | ||||
| #$  ./csvtest.sh | ||||
|  | ||||
| @ -12,4 +12,6 @@ BEGIN{output=CSV} | ||||
| 
 | ||||
| trap "rm -f t.$$.csv t.$$.csv.rules" EXIT ERR | ||||
| 
 | ||||
| hledger -f csv:t.$$.csv --rules-file t.$$.csv.rules print "$@" | ||||
| # Remove variable file name from error messages | ||||
| :; ( hledger -f csv:t.$$.csv --rules-file t.$$.csv.rules print "$@" ) \ | ||||
|        2> >( sed -re "s/t.*.csv/input/" >&2 ) | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user