From ee4a2a1c1eaeb3127b63d599d19d2678278505bf Mon Sep 17 00:00:00 2001 From: nick Date: Mon, 8 Dec 2008 01:49:31 +0000 Subject: [PATCH] New ledger parser with file inclusion --- Ledger/Parse.hs | 193 ++++++++++++++++++++++++------------------------ Tests.hs | 66 +++++++++-------- Utils.hs | 14 ++-- 3 files changed, 139 insertions(+), 134 deletions(-) diff --git a/Ledger/Parse.hs b/Ledger/Parse.hs index b4b34ed69..82e0b5b0e 100644 --- a/Ledger/Parse.hs +++ b/Ledger/Parse.hs @@ -6,6 +6,8 @@ Parsers for standard ledger and timelog files. module Ledger.Parse where +import Control.Monad +import Control.Monad.Error import Text.ParserCombinators.Parsec import Text.ParserCombinators.Parsec.Char import Text.ParserCombinators.Parsec.Language @@ -20,51 +22,71 @@ import Ledger.Amount import Ledger.Entry import Ledger.Commodity import Ledger.TimeLog +import Ledger.RawLedger import Data.Time.LocalTime import Data.Time.Calendar -- utils -parseLedgerFile :: String -> IO (Either ParseError RawLedger) -parseLedgerFile "-" = fmap (parse ledgerfile "-") $ hGetContents stdin -parseLedgerFile f = parseFromFile ledgerfile f +parseLedgerFile :: FilePath -> ErrorT String IO RawLedger +parseLedgerFile "-" = liftIO (hGetContents stdin) >>= parseLedger "-" +parseLedgerFile f = liftIO (readFile f) >>= parseLedger f printParseError :: (Show a) => a -> IO () printParseError e = do putStr "ledger parse error at "; print e --- set up token parsing, though we're not yet using these much -ledgerLanguageDef = LanguageDef { - commentStart = "" - , commentEnd = "" - , commentLine = ";" - , nestedComments = False - , identStart = letter <|> char '_' - , identLetter = alphaNum <|> oneOf "_':" - , opStart = opLetter emptyDef - , opLetter = oneOf "!#$%&*+./<=>?@\\^|-~" - , reservedOpNames= [] - , reservedNames = [] - , caseSensitive = False - } -lexer = P.makeTokenParser ledgerLanguageDef -whiteSpace = P.whiteSpace lexer -lexeme = P.lexeme lexer ---symbol = P.symbol lexer -natural = P.natural lexer -parens = P.parens lexer -semi = P.semi lexer -identifier = P.identifier lexer -reserved = P.reserved lexer -reservedOp = P.reservedOp lexer +-- Default accounts "nest" hierarchically + +data LedgerFileCtx = Ctx { ctxYear :: !(Maybe Integer) + , ctxCommod :: !(Maybe String) + , ctxAccount :: ![String] + } deriving (Read, Show) + +emptyCtx :: LedgerFileCtx +emptyCtx = Ctx { ctxYear = Nothing, ctxCommod = Nothing, ctxAccount = [] } + +parseLedger :: FilePath -> String -> ErrorT String IO RawLedger +parseLedger inname intxt = case runParser ledgerFile emptyCtx inname intxt of + Right m -> m `ap` (return rawLedgerEmpty) + Left err -> throwError $ show err + +ledgerFile :: GenParser Char LedgerFileCtx (ErrorT String IO (RawLedger -> RawLedger)) +ledgerFile = do entries <- many1 ledgerAnyEntry + eof + return $ liftM (foldr1 (.)) $ sequence entries + where ledgerAnyEntry = choice [ ledgerInclude + , liftM (return . addEntry) ledgerEntry + , liftM (return . addModifierEntry) ledgerModifierEntry + , liftM (return . addPeriodicEntry) ledgerPeriodicEntry + , blankline >> return (return id) + , commentline >> return (return id) + ] + +ledgerInclude :: GenParser Char LedgerFileCtx (ErrorT String IO (RawLedger -> RawLedger)) +ledgerInclude = do string "!include" + many1 spacenonewline + filename <- restofline + outerState <- getState + outerPos <- getPosition + let inIncluded = show outerPos ++ " in included file " ++ show filename ++ ":\n" + return $ do contents <- readFileE outerPos filename + case runParser ledgerFile outerState filename contents of + Right l -> l `catchError` (\err -> throwError $ inIncluded ++ err) + Left perr -> throwError $ inIncluded ++ show perr + where readFileE outerPos filename = ErrorT $ do (liftM Right $ readFile filename) `catch` leftError + where leftError err = return $ Left $ currentPos ++ whileReading ++ show err + currentPos = show outerPos + whileReading = " reading " ++ show filename ++ ":\n" + + +--ledgerEntry = return $ throwError "unimplemented" -- parsers -- | Parse a RawLedger from either a ledger file or a timelog file. -- It tries first the timelog parser then the ledger parser; this means -- parse errors for ledgers are useful while those for timelogs are not. -ledgerfile :: Parser RawLedger -ledgerfile = try ledgerfromtimelog <|> ledger {-| Parse a ledger file. Here is the ledger grammar from the ledger 2.5 manual: @@ -166,38 +188,18 @@ i, o, b, h See "Tests" for sample data. -} -ledger :: Parser RawLedger -ledger = do - -- we expect these to come first, unlike ledger - modifier_entries <- many ledgermodifierentry - periodic_entries <- many ledgerperiodicentry - entries <- (many $ try ledgerentry) "entry" - final_comment_lines <- ledgernondatalines - eof - return $ RawLedger modifier_entries periodic_entries entries (unlines final_comment_lines) +blankline :: GenParser Char st String +blankline = (do { s <- many spacenonewline; newline; return s }) "blank line" -ledgernondatalines :: Parser [String] -ledgernondatalines = many (try ledgerdirective <|> -- treat as comments - try commentline <|> - blankline) - -ledgerdirective :: Parser String -ledgerdirective = char '!' >> restofline "directive" - -blankline :: Parser String -blankline = - do {s <- many1 spacenonewline; newline; return s} <|> - do {newline; return ""} "blank line" - -commentline :: Parser String +commentline :: GenParser Char st String commentline = do many spacenonewline char ';' "comment line" l <- restofline return $ ";" ++ l -ledgercomment :: Parser String +ledgercomment :: GenParser Char st String ledgercomment = try (do char ';' @@ -206,25 +208,24 @@ ledgercomment = ) <|> return "" "comment" -ledgermodifierentry :: Parser ModifierEntry -ledgermodifierentry = do - char '=' "entry" +ledgerModifierEntry :: GenParser Char LedgerFileCtx ModifierEntry +ledgerModifierEntry = do + char '=' "modifier entry" many spacenonewline valueexpr <- restofline transactions <- ledgertransactions - return (ModifierEntry valueexpr transactions) + return $ ModifierEntry valueexpr transactions -ledgerperiodicentry :: Parser PeriodicEntry -ledgerperiodicentry = do +ledgerPeriodicEntry :: GenParser Char LedgerFileCtx PeriodicEntry +ledgerPeriodicEntry = do char '~' "entry" many spacenonewline periodexpr <- restofline transactions <- ledgertransactions - return (PeriodicEntry periodexpr transactions) + return $ PeriodicEntry periodexpr transactions -ledgerentry :: Parser Entry -ledgerentry = do - preceding <- ledgernondatalines +ledgerEntry :: GenParser Char LedgerFileCtx Entry +ledgerEntry = do date <- ledgerdate "entry" status <- ledgerstatus code <- ledgercode @@ -235,9 +236,9 @@ ledgerentry = do comment <- ledgercomment restofline transactions <- ledgertransactions - return $ balanceEntry $ Entry date status code description comment transactions (unlines preceding) + return $ balanceEntry $ Entry date status code description comment transactions "" -ledgerdate :: Parser Day +ledgerdate :: GenParser Char st Day ledgerdate = do y <- many1 digit char '/' @@ -247,7 +248,7 @@ ledgerdate = do many spacenonewline return (fromGregorian (read y) (read m) (read d)) -ledgerdatetime :: Parser UTCTime +ledgerdatetime :: GenParser Char st UTCTime ledgerdatetime = do day <- ledgerdate h <- many1 digit @@ -260,20 +261,20 @@ ledgerdatetime = do return $ mkUTCTime day (TimeOfDay (read h) (read m) (maybe 0 (fromIntegral.read) s)) -ledgerstatus :: Parser Bool +ledgerstatus :: GenParser Char st Bool ledgerstatus = try (do { char '*'; many1 spacenonewline; return True } ) <|> return False -ledgercode :: Parser String +ledgercode :: GenParser Char st String ledgercode = try (do { char '('; code <- anyChar `manyTill` char ')'; many1 spacenonewline; return code } ) <|> return "" -ledgertransactions :: Parser [RawTransaction] -ledgertransactions = - ((try virtualtransaction <|> try balancedvirtualtransaction <|> ledgertransaction) "transaction") - `manyTill` (do {newline "blank line"; return ()} <|> eof) +ledgertransactions :: GenParser Char st [RawTransaction] +ledgertransactions = many $ try ledgertransaction -ledgertransaction :: Parser RawTransaction -ledgertransaction = do - many1 spacenonewline +ledgertransaction :: GenParser Char st RawTransaction +ledgertransaction = many1 spacenonewline >> choice [ normaltransaction, virtualtransaction, balancedvirtualtransaction ] + +normaltransaction :: GenParser Char st RawTransaction +normaltransaction = do account <- ledgeraccountname amount <- transactionamount many spacenonewline @@ -281,9 +282,8 @@ ledgertransaction = do restofline return (RawTransaction account amount comment RegularTransaction) -virtualtransaction :: Parser RawTransaction +virtualtransaction :: GenParser Char st RawTransaction virtualtransaction = do - many1 spacenonewline char '(' account <- ledgeraccountname char ')' @@ -293,9 +293,8 @@ virtualtransaction = do restofline return (RawTransaction account amount comment VirtualTransaction) -balancedvirtualtransaction :: Parser RawTransaction +balancedvirtualtransaction :: GenParser Char st RawTransaction balancedvirtualtransaction = do - many1 spacenonewline char '[' account <- ledgeraccountname char ']' @@ -306,7 +305,7 @@ balancedvirtualtransaction = do return (RawTransaction account amount comment BalancedVirtualTransaction) -- | account names may have single spaces inside them, and are terminated by two or more spaces -ledgeraccountname :: Parser String +ledgeraccountname :: GenParser Char st String ledgeraccountname = do accountname <- many1 (accountnamechar <|> singlespace) return $ striptrailingspace accountname @@ -318,7 +317,7 @@ ledgeraccountname = do accountnamechar = notFollowedBy (oneOf "()[]") >> nonspace "account name character (non-bracket, non-parenthesis, non-whitespace)" -transactionamount :: Parser MixedAmount +transactionamount :: GenParser Char st MixedAmount transactionamount = try (do many1 spacenonewline @@ -328,7 +327,7 @@ transactionamount = someamount = try leftsymbolamount <|> try rightsymbolamount <|> nosymbolamount -leftsymbolamount :: Parser MixedAmount +leftsymbolamount :: GenParser Char st MixedAmount leftsymbolamount = do sym <- commoditysymbol sp <- many spacenonewline @@ -338,7 +337,7 @@ leftsymbolamount = do return $ Mixed [Amount c q pri] "left-symbol amount" -rightsymbolamount :: Parser MixedAmount +rightsymbolamount :: GenParser Char st MixedAmount rightsymbolamount = do (q,p,comma) <- amountquantity sp <- many spacenonewline @@ -348,7 +347,7 @@ rightsymbolamount = do return $ Mixed [Amount c q pri] "right-symbol amount" -nosymbolamount :: Parser MixedAmount +nosymbolamount :: GenParser Char st MixedAmount nosymbolamount = do (q,p,comma) <- amountquantity pri <- priceamount @@ -356,10 +355,10 @@ nosymbolamount = do return $ Mixed [Amount c q pri] "no-symbol amount" -commoditysymbol :: Parser String +commoditysymbol :: GenParser Char st String commoditysymbol = many1 (noneOf "-.0123456789;\n ") "commodity symbol" -priceamount :: Parser (Maybe MixedAmount) +priceamount :: GenParser Char st (Maybe MixedAmount) priceamount = try (do many spacenonewline @@ -374,7 +373,7 @@ priceamount = -- | parse a ledger-style numeric quantity and also return the number of -- digits to the right of the decimal point and whether thousands are -- separated by comma. -amountquantity :: Parser (Double, Int, Bool) +amountquantity :: GenParser Char st (Double, Int, Bool) amountquantity = do sign <- optionMaybe $ string "-" (intwithcommas,frac) <- numberparts @@ -392,10 +391,10 @@ amountquantity = do -- | parse the two strings of digits before and after a possible decimal -- point. The integer part may contain commas, or either part may be -- empty, or there may be no point. -numberparts :: Parser (String,String) +numberparts :: GenParser Char st (String,String) numberparts = numberpartsstartingwithdigit <|> numberpartsstartingwithpoint -numberpartsstartingwithdigit :: Parser (String,String) +numberpartsstartingwithdigit :: GenParser Char st (String,String) numberpartsstartingwithdigit = do let digitorcomma = digit <|> char ',' first <- digit @@ -403,7 +402,7 @@ numberpartsstartingwithdigit = do frac <- try (do {char '.'; many digit >>= return}) <|> return "" return (first:rest,frac) -numberpartsstartingwithpoint :: Parser (String,String) +numberpartsstartingwithpoint :: GenParser Char st (String,String) numberpartsstartingwithpoint = do char '.' frac <- many1 digit @@ -446,13 +445,13 @@ i 2007/03/10 12:26:00 hledger o 2007/03/10 17:26:02 -} -timelog :: Parser TimeLog +timelog :: GenParser Char st TimeLog timelog = do entries <- many timelogentry "timelog entry" eof return $ TimeLog entries -timelogentry :: Parser TimeLogEntry +timelogentry :: GenParser Char st TimeLogEntry timelogentry = do many (commentline <|> blankline) code <- oneOf "bhioO" @@ -461,17 +460,17 @@ timelogentry = do comment <- restofline return $ TimeLogEntry code datetime comment -ledgerfromtimelog :: Parser RawLedger -ledgerfromtimelog = do - tl <- timelog - return $ ledgerFromTimeLog tl +--ledgerfromtimelog :: GenParser Char st RawLedger +--ledgerfromtimelog = do +-- tl <- timelog +-- return $ ledgerFromTimeLog tl -- misc parsing -- | Parse a --display expression which is a simple date predicate, like -- "d>[DATE]" or "d<=[DATE]", and return a transaction-matching predicate. -datedisplayexpr :: Parser (Transaction -> Bool) +datedisplayexpr :: GenParser Char st (Transaction -> Bool) datedisplayexpr = do char 'd' op <- compareop diff --git a/Tests.hs b/Tests.hs index dace5e39d..e707ae324 100644 --- a/Tests.hs +++ b/Tests.hs @@ -55,10 +55,10 @@ misc_tests = TestList [ assertequal (Amount (comm "$") 0 Nothing) (sum [a1,a2,a3,-a3]) , "ledgertransaction" ~: do - assertparseequal rawtransaction1 (parsewith ledgertransaction rawtransaction1_str) + assertparseequal rawtransaction1 (parseWithCtx ledgertransaction rawtransaction1_str) , "ledgerentry" ~: do - assertparseequal entry1 (parsewith ledgerentry entry1_str) + assertparseequal entry1 (parseWithCtx ledgerEntry entry1_str) , "balanceEntry" ~: do assertequal @@ -87,15 +87,15 @@ misc_tests = TestList [ assertequal 15 (length $ Map.keys $ accountmap $ cacheLedger [] rawledger7) , "transactionamount" ~: do - assertparseequal (Mixed [dollars 47.18]) (parsewith transactionamount " $47.18") - assertparseequal (Mixed [Amount (Commodity {symbol="$",side=L,spaced=False,comma=False,precision=0}) 1 Nothing]) (parsewith transactionamount " $1.") + assertparseequal (Mixed [dollars 47.18]) (parseWithCtx transactionamount " $47.18") + assertparseequal (Mixed [Amount (Commodity {symbol="$",side=L,spaced=False,comma=False,precision=0}) 1 Nothing]) (parseWithCtx transactionamount " $1.") , "canonicaliseAmounts" ~: do -- all amounts use the greatest precision assertequal [2,2] (rawLedgerPrecisions $ canonicaliseAmounts False $ rawLedgerWithAmounts ["1","2.00"]) , "timeLog" ~: do - assertparseequal timelog1 (parsewith timelog timelog1_str) + assertparseequal timelog1 (parseWithCtx timelog timelog1_str) , "smart dates" ~: do let todaysdate = parsedate "2008/11/26" -- wednesday @@ -238,7 +238,7 @@ balancereportacctnames_tests = TestList ,"balancereportacctnames8" ~: ("-s",["-e"]) `gives` [] ] where gives (opt,pats) e = do - let l = sampleledger + l <- sampleledger let t = pruneZeroBalanceLeaves $ ledgerAccountTree 999 l assertequal e (balancereportacctnames l (opt=="-s") pats t) @@ -375,15 +375,15 @@ balancecommand_tests = TestList [ "") , "balance report with cost basis" ~: do - let l = cacheLedger [] $ - filterRawLedger (DateSpan Nothing Nothing) [] False False $ - canonicaliseAmounts True $ -- enable cost basis adjustment - rawledgerfromstring + rawl <- rawledgerfromstring ("" ++ "2008/1/1 test \n" ++ " a:b 10h @ $50\n" ++ " c:d \n" ++ "\n") + let l = cacheLedger [] $ + filterRawLedger (DateSpan Nothing Nothing) [] False False $ + canonicaliseAmounts True rawl -- enable cost basis adjustment assertequal (" $500 a\n" ++ " $-500 c\n" ++ @@ -392,14 +392,14 @@ balancecommand_tests = TestList [ (showBalanceReport [] [] l) ] where gives (opts,args) e = do - let l = sampleledgerwithopts [] args + l <- sampleledgerwithopts [] args assertequal e (showBalanceReport opts args l) printcommand_tests = TestList [ "print with account patterns" ~: do let args = ["expenses"] - let l = sampleledgerwithopts [] args + l <- sampleledgerwithopts [] args assertequal ( "2008/06/03 * eat & shop\n" ++ " expenses:food $1\n" ++ @@ -412,6 +412,7 @@ printcommand_tests = TestList [ registercommand_tests = TestList [ "register report" ~: do + l <- sampleledger assertequal ( "2008/01/01 income assets:checking $1 $1\n" ++ " income:salary $-1 0\n" ++ @@ -425,17 +426,21 @@ registercommand_tests = TestList [ "2008/12/31 pay off liabilities:debts $1 $1\n" ++ " assets:checking $-1 0\n" ++ "") - $ showRegisterReport [] [] sampleledger + $ showRegisterReport [] [] l , "register report with account pattern" ~: do + l <- sampleledger assertequal ( "2008/06/03 eat & shop assets:cash $-2 $-2\n" ++ "") - $ showRegisterReport [] ["cash"] sampleledger + $ showRegisterReport [] ["cash"] l , "register report with display expression" ~: do + l <- sampleledger + let expr `displayexprgives` dates = assertequal dates (datesfromregister r) + where r = showRegisterReport [Display expr] [] l "d<[2008/6/2]" `displayexprgives` ["2008/01/01","2008/06/01"] "d<=[2008/6/2]" `displayexprgives` ["2008/01/01","2008/06/01","2008/06/02"] "d=[2008/6/2]" `displayexprgives` ["2008/06/02"] @@ -444,12 +449,14 @@ registercommand_tests = TestList [ , "register report with period expression" ~: do + l <- sampleledger + let expr `displayexprgives` dates = assertequal dates (datesfromregister r) + where r = showRegisterReport [Display expr] [] l "" `periodexprgives` ["2008/01/01","2008/06/01","2008/06/02","2008/06/03","2008/12/31"] "2008" `periodexprgives` ["2008/01/01","2008/06/01","2008/06/02","2008/06/03","2008/12/31"] "2007" `periodexprgives` [] "june" `periodexprgives` ["2008/06/01","2008/06/02","2008/06/03"] "monthly" `periodexprgives` ["2008/01/01","2008/06/01","2008/12/01"] - assertequal ( "2008/01/01 - 2008/12/31 assets:cash $-2 $-2\n" ++ " assets:saving $1 $-1\n" ++ @@ -459,25 +466,18 @@ registercommand_tests = TestList [ " income:salary $-1 $-1\n" ++ " liabilities:debts $1 0\n" ++ "") - (showRegisterReport [Period "yearly"] [] sampleledger) - + (showRegisterReport [Period "yearly"] [] l) assertequal ["2008/01/01","2008/04/01","2008/10/01"] - (datesfromregister $ showRegisterReport [Period "quarterly"] [] sampleledger) + (datesfromregister $ showRegisterReport [Period "quarterly"] [] l) assertequal ["2008/01/01","2008/04/01","2008/07/01","2008/10/01"] - (datesfromregister $ showRegisterReport [Period "quarterly",Empty] [] sampleledger) + (datesfromregister $ showRegisterReport [Period "quarterly",Empty] [] l) ] - where - expr `displayexprgives` dates = - assertequal dates (datesfromregister r) - where - r = showRegisterReport [Display expr] [] sampleledger - expr `periodexprgives` dates = - assertequal dates (datesfromregister r) - where - r = showRegisterReport [Period expr] [] l - l = sampleledgerwithopts [Period expr] [] - datesfromregister = filter (not . null) . map (strip . take 10) . lines + where datesfromregister = filter (not . null) . map (strip . take 10) . lines + expr `periodexprgives` dates = do lopts <- sampleledgerwithopts [Period expr] [] + let r = showRegisterReport [Period expr] [] lopts + assertequal dates (datesfromregister r) + ------------------------------------------------------------------------------ @@ -486,7 +486,7 @@ registercommand_tests = TestList [ refdate = parsedate "2008/11/26" sampleledger = ledgerfromstringwithopts [] [] refdate sample_ledger_str sampleledgerwithopts opts args = ledgerfromstringwithopts opts args refdate sample_ledger_str -sampleledgerwithoptsanddate opts args date = ledgerfromstringwithopts opts args date sample_ledger_str +--sampleledgerwithoptsanddate opts args date = unsafePerformIO $ ledgerfromstringwithopts opts args date sample_ledger_str sample_ledger_str = ( "; A sample ledger file.\n" ++ @@ -816,6 +816,7 @@ rawledger7 = RawLedger epreceding_comment_lines="" } ] + [] "" ledger7 = cacheLedger [] rawledger7 @@ -878,6 +879,7 @@ rawLedgerWithAmounts as = [] [] [nullentry{edescription=a,etransactions=[nullrawtxn{tamount=parse a}]} | a <- as] + [] "" - where parse = fromparse . parsewith transactionamount . (" "++) + where parse = fromparse . parseWithCtx transactionamount . (" "++) diff --git a/Utils.hs b/Utils.hs index 600169f48..8d71a2eea 100644 --- a/Utils.hs +++ b/Utils.hs @@ -6,6 +6,7 @@ Utilities for top-level modules and/or ghci. See also "Ledger.Utils". module Utils where +import Control.Monad.Error import qualified Data.Map as Map (lookup) import Text.ParserCombinators.Parsec import System.IO @@ -26,18 +27,18 @@ prepareLedger opts args refdate rl = cb = CostBasis `elem` opts -- | Get a RawLedger from the given string, or raise an error. -rawledgerfromstring :: String -> RawLedger -rawledgerfromstring = fromparse . parsewith ledgerfile +rawledgerfromstring :: String -> IO RawLedger +rawledgerfromstring = liftM (either error id) . runErrorT . parseLedger "(string)" -- | Get a Ledger from the given string and options, or raise an error. -ledgerfromstringwithopts :: [Opt] -> [String] -> Day -> String -> Ledger +ledgerfromstringwithopts :: [Opt] -> [String] -> Day -> String -> IO Ledger ledgerfromstringwithopts opts args refdate s = - prepareLedger opts args refdate $ rawledgerfromstring s + liftM (prepareLedger opts args refdate) $ rawledgerfromstring s -- | Get a Ledger from the given file path and options, or raise an error. ledgerfromfilewithopts :: [Opt] -> [String] -> FilePath -> IO Ledger ledgerfromfilewithopts opts args f = do - rl <- readFile f >>= return . rawledgerfromstring + rl <- readFile f >>= rawledgerfromstring refdate <- today return $ prepareLedger opts args refdate rl @@ -45,3 +46,6 @@ ledgerfromfilewithopts opts args f = do -- Assumes no options. myledger :: IO Ledger myledger = ledgerFilePathFromOpts [] >>= ledgerfromfilewithopts [] [] + +parseWithCtx :: GenParser Char LedgerFileCtx a -> String -> Either ParseError a +parseWithCtx p ts = runParser p emptyCtx "" ts \ No newline at end of file