diff --git a/hledger-lib/Hledger/Read.hs b/hledger-lib/Hledger/Read.hs index 2a7a382f8..4a2314f96 100644 --- a/hledger-lib/Hledger/Read.hs +++ b/hledger-lib/Hledger/Read.hs @@ -8,7 +8,8 @@ to import modules below this one. -} -module Hledger.Read ( +module Hledger.Read + ( readFormatNames, -- * Journal reading API defaultJournalPath, @@ -33,22 +34,210 @@ module Hledger.Read ( tests_Hledger_Read, ) where +import qualified Control.Exception as C import Control.Monad.Except import Data.List import Data.Maybe +import System.Directory (doesFileExist, getHomeDirectory) +import System.Environment (getEnv) +import System.Exit (exitFailure) +import System.FilePath (()) +import System.IO (IOMode(..), openFile, stdin, stderr, hSetNewlineMode, universalNewlineMode) import Test.HUnit +import Text.Printf -import Hledger.Data.Types +import Hledger.Data.Dates (getCurrentDay) import Hledger.Data.Journal (nullctx) -import Hledger.Read.Util +import Hledger.Data.Types import Hledger.Read.JournalReader as JournalReader -import Hledger.Read.TimeclockReader as TimeclockReader import Hledger.Read.TimedotReader as TimedotReader +import Hledger.Read.TimeclockReader as TimeclockReader import Hledger.Read.CsvReader as CsvReader import Hledger.Utils import Prelude hiding (getContents, writeFile) +import Hledger.Utils.UTF8IOCompat (hGetContents, writeFile) +-- The available data file readers, each one handling a particular data +-- format. The first is also used as the default for unknown formats. +readers :: [Reader] +readers = [ + JournalReader.reader + ,TimeclockReader.reader + ,TimedotReader.reader + ,CsvReader.reader + ] + +readFormatNames :: [StorageFormat] +readFormatNames = map rFormat readers + +journalEnvVar = "LEDGER_FILE" +journalEnvVar2 = "LEDGER" +journalDefaultFilename = ".hledger.journal" + +-- | Which readers are worth trying for this (possibly unspecified) format, filepath, and data ? +readersFor :: (Maybe StorageFormat, Maybe FilePath, String) -> [Reader] +readersFor (format,path,s) = + dbg1 ("possible readers for "++show (format,path,elideRight 30 s)) $ + case format of + Just f -> case readerForStorageFormat f of Just r -> [r] + Nothing -> [] + Nothing -> case path of Nothing -> readers + Just p -> case readersForPathAndData (p,s) of [] -> readers + rs -> rs + +-- | Find the (first) reader which can handle the given format, if any. +readerForStorageFormat :: StorageFormat -> Maybe Reader +readerForStorageFormat s | null rs = Nothing + | otherwise = Just $ head rs + where + rs = filter ((s==).rFormat) readers :: [Reader] + +-- | Find the readers which think they can handle the given file path and data, if any. +readersForPathAndData :: (FilePath,String) -> [Reader] +readersForPathAndData (f,s) = filter (\r -> (rDetector r) f s) readers + +-- try each reader in turn, returning the error of the first if all fail +tryReaders :: [Reader] -> Maybe FilePath -> Bool -> Maybe FilePath -> String -> IO (Either String Journal) +tryReaders readers mrulesfile assrt path s = firstSuccessOrBestError [] readers + where + firstSuccessOrBestError :: [String] -> [Reader] -> IO (Either String Journal) + firstSuccessOrBestError [] [] = return $ Left "no readers found" + firstSuccessOrBestError errs (r:rs) = do + dbg1IO "trying reader" (rFormat r) + result <- (runExceptT . (rParser r) mrulesfile assrt path') s + dbg1IO "reader result" $ either id show result + case result of Right j -> return $ Right j -- success! + Left e -> firstSuccessOrBestError (errs++[e]) rs -- keep trying + firstSuccessOrBestError (e:_) [] = return $ Left e -- none left, return first error + path' = fromMaybe "(string)" path + + +-- | Read a journal from this string, trying whatever readers seem appropriate: +-- +-- - if a format is specified, try that reader only +-- +-- - or if one or more readers recognises the file path and data, try those +-- +-- - otherwise, try them all. +-- +-- A CSV conversion rules file may also be specified for use by the CSV reader. +-- Also there is a flag specifying whether to check or ignore balance assertions in the journal. +readJournal :: Maybe StorageFormat -> Maybe FilePath -> Bool -> Maybe FilePath -> String -> IO (Either String Journal) +readJournal mformat mrulesfile assrt path s = tryReaders (readersFor (mformat, path, s)) mrulesfile assrt path s + +-- | Read a Journal from this file (or stdin if the filename is -) or give +-- an error message, using the specified data format or trying all known +-- formats. A CSV conversion rules file may be specified for better +-- conversion of that format. Also there is a flag specifying whether +-- to check or ignore balance assertions in the journal. +readJournalFile :: Maybe StorageFormat -> Maybe FilePath -> Bool -> FilePath -> IO (Either String Journal) +readJournalFile format rulesfile assrt f = readJournalFiles format rulesfile assrt [f] + +readJournalFiles :: Maybe StorageFormat -> Maybe FilePath -> Bool -> [FilePath] -> IO (Either String Journal) +readJournalFiles format rulesfile assrt fs = do + contents <- fmap concat $ mapM readFileAnyNewline fs + readJournal format rulesfile assrt (listToMaybe fs) contents + where + readFileAnyNewline f = do + requireJournalFileExists f + h <- fileHandle f + hSetNewlineMode h universalNewlineMode + hGetContents h + fileHandle "-" = return stdin + fileHandle f = openFile f ReadMode + +-- | If the specified journal file does not exist, give a helpful error and quit. +requireJournalFileExists :: FilePath -> IO () +requireJournalFileExists "-" = return () +requireJournalFileExists f = do + exists <- doesFileExist f + when (not exists) $ do + hPrintf stderr "The hledger journal file \"%s\" was not found.\n" f + hPrintf stderr "Please create it first, eg with \"hledger add\" or a text editor.\n" + hPrintf stderr "Or, specify an existing journal file with -f or LEDGER_FILE.\n" + exitFailure + +-- | Ensure there is a journal file at the given path, creating an empty one if needed. +ensureJournalFileExists :: FilePath -> IO () +ensureJournalFileExists f = do + exists <- doesFileExist f + when (not exists) $ do + hPrintf stderr "Creating hledger journal file %s.\n" f + -- note Hledger.Utils.UTF8.* do no line ending conversion on windows, + -- we currently require unix line endings on all platforms. + newJournalContent >>= writeFile f + +-- | Give the content for a new auto-created journal file. +newJournalContent :: IO String +newJournalContent = do + d <- getCurrentDay + return $ printf "; journal created %s by hledger\n" (show d) + +-- | Get the default journal file path specified by the environment. +-- Like ledger, we look first for the LEDGER_FILE environment +-- variable, and if that does not exist, for the legacy LEDGER +-- environment variable. If neither is set, or the value is blank, +-- return the hard-coded default, which is @.hledger.journal@ in the +-- users's home directory (or in the current directory, if we cannot +-- determine a home directory). +defaultJournalPath :: IO String +defaultJournalPath = do + s <- envJournalPath + if null s then defaultJournalPath else return s + where + envJournalPath = + getEnv journalEnvVar + `C.catch` (\(_::C.IOException) -> getEnv journalEnvVar2 + `C.catch` (\(_::C.IOException) -> return "")) + defaultJournalPath = do + home <- getHomeDirectory `C.catch` (\(_::C.IOException) -> return "") + return $ home journalDefaultFilename + +-- | Read the default journal file specified by the environment, or raise an error. +defaultJournal :: IO Journal +defaultJournal = defaultJournalPath >>= readJournalFile Nothing Nothing True >>= either error' return + +-- | Read a journal from the given string, trying all known formats, or simply throw an error. +readJournal' :: String -> IO Journal +readJournal' s = readJournal Nothing Nothing True Nothing s >>= either error' return + +tests_readJournal' = [ + "readJournal' parses sample journal" ~: do + _ <- samplejournal + assertBool "" True + ] + +-- tests + +samplejournal = readJournal' $ unlines + ["2008/01/01 income" + ," assets:bank:checking $1" + ," income:salary" + ,"" + ,"comment" + ,"multi line comment here" + ,"for testing purposes" + ,"end comment" + ,"" + ,"2008/06/01 gift" + ," assets:bank:checking $1" + ," income:gifts" + ,"" + ,"2008/06/02 save" + ," assets:bank:saving $1" + ," assets:bank:checking" + ,"" + ,"2008/06/03 * eat & shop" + ," expenses:food $1" + ," expenses:supplies $1" + ," assets:cash" + ,"" + ,"2008/12/31 * pay off" + ," liabilities:debts $1" + ," assets:bank:checking" + ] + tests_Hledger_Read = TestList $ tests_readJournal' ++ [ diff --git a/hledger-lib/Hledger/Read/Common.hs b/hledger-lib/Hledger/Read/Common.hs new file mode 100644 index 000000000..7a465a7f7 --- /dev/null +++ b/hledger-lib/Hledger/Read/Common.hs @@ -0,0 +1,867 @@ +--- * doc +-- Lines beginning "--- *" are collapsible orgstruct nodes. Emacs users, +-- (add-hook 'haskell-mode-hook +-- (lambda () (set-variable 'orgstruct-heading-prefix-regexp "--- " t)) +-- 'orgstruct-mode) +-- and press TAB on nodes to expand/collapse. + +{-| + +Some common parsers and parsing helpers used by several readers. + +-} + +--- * module +{-# LANGUAGE CPP, RecordWildCards, NamedFieldPuns, NoMonoLocalBinds, ScopedTypeVariables, FlexibleContexts, TupleSections #-} + +module Hledger.Read.Common +where +--- * imports +import Prelude () +import Prelude.Compat hiding (readFile) +import Control.Monad.Compat +import Control.Monad.Except (ExceptT(..), liftIO, runExceptT, throwError) --, catchError) +import Data.Char (isNumber) +import Data.Functor.Identity +import Data.List.Compat +import Data.List.Split (wordsBy) +import Data.Maybe +import Data.Time.Calendar +import Data.Time.LocalTime +import Safe +import System.Time (getClockTime) +import Text.Parsec hiding (parse) + +import Hledger.Data +import Hledger.Utils + + +--- * parsing utils + +-- | A parser of strings with generic user state, monad and return type. +type StringParser u m a = ParsecT String u m a + +-- | A string parser with journal-parsing state. +type JournalParser m a = StringParser JournalContext m a + +-- | A journal parser that runs in IO and can throw an error mid-parse. +type ErroringJournalParser a = JournalParser (ExceptT String IO) a + +-- | Run a string parser with no state in the identity monad. +runStringParser, rsp :: StringParser () Identity a -> String -> Either ParseError a +runStringParser p s = runIdentity $ runParserT p () "" s +rsp = runStringParser + +-- | Run a journal parser with a null journal-parsing state. +runJournalParser, rjp :: Monad m => JournalParser m a -> String -> m (Either ParseError a) +runJournalParser p s = runParserT p nullctx "" s +rjp = runJournalParser + +-- | Run an error-raising journal parser with a null journal-parsing state. +runErroringJournalParser, rejp :: ErroringJournalParser a -> String -> IO (Either String a) +runErroringJournalParser p s = runExceptT $ runJournalParser p s >>= either (throwError.show) return +rejp = runErroringJournalParser + +genericSourcePos :: SourcePos -> GenericSourcePos +genericSourcePos p = GenericSourcePos (sourceName p) (sourceLine p) (sourceColumn p) + +-- | Flatten a list of JournalUpdate's (journal-transforming +-- monadic actions which can do IO or raise an exception) into a +-- single equivalent action. +combineJournalUpdates :: [JournalUpdate] -> JournalUpdate +combineJournalUpdates us = foldl' (flip (.)) id <$> sequence us +-- XXX may be contributing to excessive stack use + +-- cf http://neilmitchell.blogspot.co.uk/2015/09/detecting-space-leaks.html +-- $ ./devprof +RTS -K576K -xc +-- Exception (reporting due to +RTS -xc): (THUNK_STATIC), stack trace: +-- Hledger.Read.JournalReader.combineJournalUpdates.\, +-- called from Hledger.Read.JournalReader.combineJournalUpdates, +-- called from Hledger.Read.JournalReader.fixedlotprice, +-- called from Hledger.Read.JournalReader.partialbalanceassertion, +-- called from Hledger.Read.JournalReader.getDefaultCommodityAndStyle, +-- called from Hledger.Read.JournalReader.priceamount, +-- called from Hledger.Read.JournalReader.nosymbolamount, +-- called from Hledger.Read.JournalReader.numberp, +-- called from Hledger.Read.JournalReader.rightsymbolamount, +-- called from Hledger.Read.JournalReader.simplecommoditysymbol, +-- called from Hledger.Read.JournalReader.quotedcommoditysymbol, +-- called from Hledger.Read.JournalReader.commoditysymbol, +-- called from Hledger.Read.JournalReader.signp, +-- called from Hledger.Read.JournalReader.leftsymbolamount, +-- called from Hledger.Read.JournalReader.amountp, +-- called from Hledger.Read.JournalReader.spaceandamountormissing, +-- called from Hledger.Read.JournalReader.accountnamep.singlespace, +-- called from Hledger.Utils.Parse.nonspace, +-- called from Hledger.Read.JournalReader.accountnamep, +-- called from Hledger.Read.JournalReader.getAccountAliases, +-- called from Hledger.Read.JournalReader.getParentAccount, +-- called from Hledger.Read.JournalReader.modifiedaccountnamep, +-- called from Hledger.Read.JournalReader.postingp, +-- called from Hledger.Read.JournalReader.postings, +-- called from Hledger.Read.JournalReader.commentStartingWith, +-- called from Hledger.Read.JournalReader.semicoloncomment, +-- called from Hledger.Read.JournalReader.followingcommentp, +-- called from Hledger.Read.JournalReader.descriptionp, +-- called from Hledger.Read.JournalReader.codep, +-- called from Hledger.Read.JournalReader.statusp, +-- called from Hledger.Utils.Parse.spacenonewline, +-- called from Hledger.Read.JournalReader.secondarydatep, +-- called from Hledger.Data.Dates.datesepchar, +-- called from Hledger.Read.JournalReader.datep, +-- called from Hledger.Read.JournalReader.transaction, +-- called from Hledger.Utils.Parse.choice', +-- called from Hledger.Read.JournalReader.directive, +-- called from Hledger.Read.JournalReader.emptyorcommentlinep, +-- called from Hledger.Read.JournalReader.multilinecommentp, +-- called from Hledger.Read.JournalReader.journal.journalItem, +-- called from Hledger.Read.JournalReader.journal, +-- called from Hledger.Read.JournalReader.parseJournalWith, +-- called from Hledger.Read.readJournal.tryReaders.firstSuccessOrBestError, +-- called from Hledger.Read.readJournal.tryReaders, +-- called from Hledger.Read.readJournal, +-- called from Main.main, +-- called from Main.CAF +-- Stack space overflow: current size 33568 bytes. + +-- | Given a JournalUpdate-generating parsec parser, file path and data string, +-- parse and post-process a Journal so that it's ready to use, or give an error. +parseAndFinaliseJournal :: ErroringJournalParser (JournalUpdate,JournalContext) -> Bool -> FilePath -> String -> ExceptT String IO Journal +parseAndFinaliseJournal parser assrt f s = do + tc <- liftIO getClockTime + tl <- liftIO getCurrentLocalTime + y <- liftIO getCurrentYear + r <- runParserT parser nullctx{ctxYear=Just y} f s + case r of + Right (updates,ctx) -> do + j <- ap updates (return nulljournal) + case journalFinalise tc tl f s ctx assrt j of + Right j' -> return j' + Left estr -> throwError estr + Left e -> throwError $ show e + +setYear :: Monad m => Integer -> JournalParser m () +setYear y = modifyState (\ctx -> ctx{ctxYear=Just y}) + +getYear :: Monad m => JournalParser m (Maybe Integer) +getYear = fmap ctxYear getState + +setDefaultCommodityAndStyle :: Monad m => (CommoditySymbol,AmountStyle) -> JournalParser m () +setDefaultCommodityAndStyle cs = modifyState (\ctx -> ctx{ctxDefaultCommodityAndStyle=Just cs}) + +getDefaultCommodityAndStyle :: Monad m => JournalParser m (Maybe (CommoditySymbol,AmountStyle)) +getDefaultCommodityAndStyle = ctxDefaultCommodityAndStyle `fmap` getState + +pushAccount :: Monad m => String -> JournalParser m () +pushAccount acct = modifyState addAccount + where addAccount ctx0 = ctx0 { ctxAccounts = acct : ctxAccounts ctx0 } + +pushParentAccount :: Monad m => String -> JournalParser m () +pushParentAccount parent = modifyState addParentAccount + where addParentAccount ctx0 = ctx0 { ctxParentAccount = parent : ctxParentAccount ctx0 } + +popParentAccount :: Monad m => JournalParser m () +popParentAccount = do ctx0 <- getState + case ctxParentAccount ctx0 of + [] -> unexpected "End of apply account block with no beginning" + (_:rest) -> setState $ ctx0 { ctxParentAccount = rest } + +getParentAccount :: Monad m => JournalParser m String +getParentAccount = fmap (concatAccountNames . reverse . ctxParentAccount) getState + +addAccountAlias :: Monad m => AccountAlias -> JournalParser m () +addAccountAlias a = modifyState (\(ctx@Ctx{..}) -> ctx{ctxAliases=a:ctxAliases}) + +getAccountAliases :: Monad m => JournalParser m [AccountAlias] +getAccountAliases = fmap ctxAliases getState + +clearAccountAliases :: Monad m => JournalParser m () +clearAccountAliases = modifyState (\(ctx@Ctx{..}) -> ctx{ctxAliases=[]}) + +getIndex :: Monad m => JournalParser m Integer +getIndex = fmap ctxTransactionIndex getState + +setIndex :: Monad m => Integer -> JournalParser m () +setIndex i = modifyState (\ctx -> ctx{ctxTransactionIndex=i}) + +journalAddFile :: (FilePath,String) -> Journal -> Journal +journalAddFile f j@Journal{files=fs} = j{files=fs++[f]} + -- NOTE: first encountered file to left, to avoid a reverse + +-- -- | Terminate parsing entirely, returning the given error message +-- -- with the current parse position prepended. +-- parserError :: String -> ErroringJournalParser a +-- parserError s = do +-- pos <- getPosition +-- parserErrorAt pos s + +-- | Terminate parsing entirely, returning the given error message +-- with the given parse position prepended. +parserErrorAt :: SourcePos -> String -> ErroringJournalParser a +parserErrorAt pos s = throwError $ show pos ++ ":\n" ++ s + +--- * parsers +--- ** transaction bits + +statusp :: Monad m => JournalParser m ClearedStatus +statusp = + choice' + [ many spacenonewline >> char '*' >> return Cleared + , many spacenonewline >> char '!' >> return Pending + , return Uncleared + ] + "cleared status" + +codep :: Monad m => JournalParser m String +codep = try (do { many1 spacenonewline; char '(' "codep"; anyChar `manyTill` char ')' } ) <|> return "" + +descriptionp :: Monad m => JournalParser m String +descriptionp = many (noneOf ";\n") + +--- ** dates + +-- | Parse a date in YYYY/MM/DD format. +-- Hyphen (-) and period (.) are also allowed as separators. +-- The year may be omitted if a default year has been set. +-- Leading zeroes may be omitted. +datep :: Monad m => JournalParser m Day +datep = do + -- hacky: try to ensure precise errors for invalid dates + -- XXX reported error position is not too good + -- pos <- genericSourcePos <$> getPosition + datestr <- do + c <- digit + cs <- many $ choice' [digit, datesepchar] + return $ c:cs + let sepchars = nub $ sort $ filter (`elem` datesepchars) datestr + when (length sepchars /= 1) $ fail $ "bad date, different separators used: " ++ datestr + let dateparts = wordsBy (`elem` datesepchars) datestr + currentyear <- getYear + [y,m,d] <- case (dateparts,currentyear) of + ([m,d],Just y) -> return [show y,m,d] + ([_,_],Nothing) -> fail $ "partial date "++datestr++" found, but the current year is unknown" + ([y,m,d],_) -> return [y,m,d] + _ -> fail $ "bad date: " ++ datestr + let maybedate = fromGregorianValid (read y) (read m) (read d) + case maybedate of + Nothing -> fail $ "bad date: " ++ datestr + Just date -> return date + "full or partial date" + +-- | Parse a date and time in YYYY/MM/DD HH:MM[:SS][+-ZZZZ] format. +-- Hyphen (-) and period (.) are also allowed as date separators. +-- The year may be omitted if a default year has been set. +-- Seconds are optional. +-- The timezone is optional and ignored (the time is always interpreted as a local time). +-- Leading zeroes may be omitted (except in a timezone). +datetimep :: Monad m => JournalParser m LocalTime +datetimep = do + day <- datep + many1 spacenonewline + h <- many1 digit + let h' = read h + guard $ h' >= 0 && h' <= 23 + char ':' + m <- many1 digit + let m' = read m + guard $ m' >= 0 && m' <= 59 + s <- optionMaybe $ char ':' >> many1 digit + let s' = case s of Just sstr -> read sstr + Nothing -> 0 + guard $ s' >= 0 && s' <= 59 + {- tz <- -} + optionMaybe $ do + plusminus <- oneOf "-+" + d1 <- digit + d2 <- digit + d3 <- digit + d4 <- digit + return $ plusminus:d1:d2:d3:d4:"" + -- ltz <- liftIO $ getCurrentTimeZone + -- let tz' = maybe ltz (fromMaybe ltz . parseTime defaultTimeLocale "%z") tz + -- return $ localTimeToUTC tz' $ LocalTime day $ TimeOfDay h' m' (fromIntegral s') + return $ LocalTime day $ TimeOfDay h' m' (fromIntegral s') + +secondarydatep :: Monad m => Day -> JournalParser m Day +secondarydatep primarydate = do + char '=' + -- kludgy way to use primary date for default year + let withDefaultYear d p = do + y <- getYear + let (y',_,_) = toGregorian d in setYear y' + r <- p + when (isJust y) $ setYear $ fromJust y -- XXX + -- mapM setYear <$> y + return r + withDefaultYear primarydate datep + +-- | +-- >> parsewith twoorthreepartdatestringp "2016/01/2" +-- Right "2016/01/2" +-- twoorthreepartdatestringp = do +-- n1 <- many1 digit +-- c <- datesepchar +-- n2 <- many1 digit +-- mn3 <- optionMaybe $ char c >> many1 digit +-- return $ n1 ++ c:n2 ++ maybe "" (c:) mn3 + +--- ** account names + +-- | Parse an account name, then apply any parent account prefix and/or account aliases currently in effect. +modifiedaccountnamep :: Monad m => JournalParser m AccountName +modifiedaccountnamep = do + parent <- getParentAccount + aliases <- getAccountAliases + a <- accountnamep + return $ + accountNameApplyAliases aliases $ + -- XXX accountNameApplyAliasesMemo ? doesn't seem to make a difference + joinAccountNames parent + a + +-- | Parse an account name. Account names start with a non-space, may +-- have single spaces inside them, and are terminated by two or more +-- spaces (or end of input). Also they have one or more components of +-- at least one character, separated by the account separator char. +-- (This parser will also consume one following space, if present.) +accountnamep :: Monad m => StringParser u m AccountName +accountnamep = do + a <- do + c <- nonspace + cs <- striptrailingspace <$> many (nonspace <|> singlespace) + return $ c:cs + when (accountNameFromComponents (accountNameComponents a) /= a) + (fail $ "account name seems ill-formed: "++a) + return a + where + singlespace = try (do {spacenonewline; do {notFollowedBy spacenonewline; return ' '}}) + striptrailingspace "" = "" + striptrailingspace s = if last s == ' ' then init s else s + +-- accountnamechar = notFollowedBy (oneOf "()[]") >> nonspace +-- "account name character (non-bracket, non-parenthesis, non-whitespace)" + +--- ** amounts + +-- | Parse whitespace then an amount, with an optional left or right +-- currency symbol and optional price, or return the special +-- "missing" marker amount. +spaceandamountormissingp :: Monad m => JournalParser m MixedAmount +spaceandamountormissingp = + try (do + many1 spacenonewline + (Mixed . (:[])) `fmap` amountp <|> return missingmixedamt + ) <|> return missingmixedamt + +#ifdef TESTS +assertParseEqual' :: (Show a, Eq a) => (Either ParseError a) -> a -> Assertion +assertParseEqual' parse expected = either (assertFailure.show) (`is'` expected) parse + +is' :: (Eq a, Show a) => a -> a -> Assertion +a `is'` e = assertEqual e a + +test_spaceandamountormissingp = do + assertParseEqual' (parseWithCtx nullctx spaceandamountormissingp " $47.18") (Mixed [usd 47.18]) + assertParseEqual' (parseWithCtx nullctx spaceandamountormissingp "$47.18") missingmixedamt + assertParseEqual' (parseWithCtx nullctx spaceandamountormissingp " ") missingmixedamt + assertParseEqual' (parseWithCtx nullctx spaceandamountormissingp "") missingmixedamt +#endif + +-- | Parse a single-commodity amount, with optional symbol on the left or +-- right, optional unit or total price, and optional (ignored) +-- ledger-style balance assertion or fixed lot price declaration. +amountp :: Monad m => JournalParser m Amount +amountp = try leftsymbolamountp <|> try rightsymbolamountp <|> nosymbolamountp + +#ifdef TESTS +test_amountp = do + assertParseEqual' (parseWithCtx nullctx amountp "$47.18") (usd 47.18) + assertParseEqual' (parseWithCtx nullctx amountp "$1.") (usd 1 `withPrecision` 0) + -- ,"amount with unit price" ~: do + assertParseEqual' + (parseWithCtx nullctx amountp "$10 @ €0.5") + (usd 10 `withPrecision` 0 `at` (eur 0.5 `withPrecision` 1)) + -- ,"amount with total price" ~: do + assertParseEqual' + (parseWithCtx nullctx amountp "$10 @@ €5") + (usd 10 `withPrecision` 0 @@ (eur 5 `withPrecision` 0)) +#endif + +-- | Parse an amount from a string, or get an error. +amountp' :: String -> Amount +amountp' s = + case runParser (amountp <* eof) nullctx "" s of + Right t -> t + Left err -> error' $ show err -- XXX should throwError + +-- | Parse a mixed amount from a string, or get an error. +mamountp' :: String -> MixedAmount +mamountp' = Mixed . (:[]) . amountp' + +signp :: Monad m => JournalParser m String +signp = do + sign <- optionMaybe $ oneOf "+-" + return $ case sign of Just '-' -> "-" + _ -> "" + +leftsymbolamountp :: Monad m => JournalParser m Amount +leftsymbolamountp = do + sign <- signp + c <- commoditysymbolp + sp <- many spacenonewline + (q,prec,mdec,mgrps) <- numberp + let s = amountstyle{ascommodityside=L, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps} + p <- priceamountp + let applysign = if sign=="-" then negate else id + return $ applysign $ Amount c q p s + "left-symbol amount" + +rightsymbolamountp :: Monad m => JournalParser m Amount +rightsymbolamountp = do + (q,prec,mdec,mgrps) <- numberp + sp <- many spacenonewline + c <- commoditysymbolp + p <- priceamountp + let s = amountstyle{ascommodityside=R, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps} + return $ Amount c q p s + "right-symbol amount" + +nosymbolamountp :: Monad m => JournalParser m Amount +nosymbolamountp = do + (q,prec,mdec,mgrps) <- numberp + p <- priceamountp + -- apply the most recently seen default commodity and style to this commodityless amount + defcs <- getDefaultCommodityAndStyle + let (c,s) = case defcs of + Just (defc,defs) -> (defc, defs{asprecision=max (asprecision defs) prec}) + Nothing -> ("", amountstyle{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}) + return $ Amount c q p s + "no-symbol amount" + +commoditysymbolp :: Monad m => JournalParser m String +commoditysymbolp = (quotedcommoditysymbolp <|> simplecommoditysymbolp) "commodity symbol" + +quotedcommoditysymbolp :: Monad m => JournalParser m String +quotedcommoditysymbolp = do + char '"' + s <- many1 $ noneOf ";\n\"" + char '"' + return s + +simplecommoditysymbolp :: Monad m => JournalParser m String +simplecommoditysymbolp = many1 (noneOf nonsimplecommoditychars) + +priceamountp :: Monad m => JournalParser m Price +priceamountp = + try (do + many spacenonewline + char '@' + try (do + char '@' + many spacenonewline + a <- amountp -- XXX can parse more prices ad infinitum, shouldn't + return $ TotalPrice a) + <|> (do + many spacenonewline + a <- amountp -- XXX can parse more prices ad infinitum, shouldn't + return $ UnitPrice a)) + <|> return NoPrice + +partialbalanceassertionp :: Monad m => JournalParser m (Maybe MixedAmount) +partialbalanceassertionp = + try (do + many spacenonewline + char '=' + many spacenonewline + a <- amountp -- XXX should restrict to a simple amount + return $ Just $ Mixed [a]) + <|> return Nothing + +-- balanceassertion :: Monad m => JournalParser m (Maybe MixedAmount) +-- balanceassertion = +-- try (do +-- many spacenonewline +-- string "==" +-- many spacenonewline +-- a <- amountp -- XXX should restrict to a simple amount +-- return $ Just $ Mixed [a]) +-- <|> return Nothing + +-- http://ledger-cli.org/3.0/doc/ledger3.html#Fixing-Lot-Prices +fixedlotpricep :: Monad m => JournalParser m (Maybe Amount) +fixedlotpricep = + try (do + many spacenonewline + char '{' + many spacenonewline + char '=' + many spacenonewline + a <- amountp -- XXX should restrict to a simple amount + many spacenonewline + char '}' + return $ Just a) + <|> return Nothing + +-- | Parse a string representation of a number for its value and display +-- attributes. +-- +-- Some international number formats are accepted, eg either period or comma +-- may be used for the decimal point, and the other of these may be used for +-- separating digit groups in the integer part. See +-- http://en.wikipedia.org/wiki/Decimal_separator for more examples. +-- +-- This returns: the parsed numeric value, the precision (number of digits +-- seen following the decimal point), the decimal point character used if any, +-- and the digit group style if any. +-- +numberp :: Monad m => JournalParser m (Quantity, Int, Maybe Char, Maybe DigitGroupStyle) +numberp = do + -- a number is an optional sign followed by a sequence of digits possibly + -- interspersed with periods, commas, or both + -- ptrace "numberp" + sign <- signp + parts <- many1 $ choice' [many1 digit, many1 $ char ',', many1 $ char '.'] + dbg8 "numberp parsed" (sign,parts) `seq` return () + + -- check the number is well-formed and identify the decimal point and digit + -- group separator characters used, if any + let (numparts, puncparts) = partition numeric parts + (ok, mdecimalpoint, mseparator) = + case (numparts, puncparts) of + ([],_) -> (False, Nothing, Nothing) -- no digits, not ok + (_,[]) -> (True, Nothing, Nothing) -- digits with no punctuation, ok + (_,[[d]]) -> (True, Just d, Nothing) -- just a single punctuation of length 1, assume it's a decimal point + (_,[_]) -> (False, Nothing, Nothing) -- a single punctuation of some other length, not ok + (_,_:_:_) -> -- two or more punctuations + let (s:ss, d) = (init puncparts, last puncparts) -- the leftmost is a separator and the rightmost may be a decimal point + in if any ((/=1).length) puncparts -- adjacent punctuation chars, not ok + || any (s/=) ss -- separator chars vary, not ok + || head parts == s -- number begins with a separator char, not ok + then (False, Nothing, Nothing) + else if s == d + then (True, Nothing, Just $ head s) -- just one kind of punctuation - must be separators + else (True, Just $ head d, Just $ head s) -- separator(s) and a decimal point + unless ok $ fail $ "number seems ill-formed: "++concat parts + + -- get the digit group sizes and digit group style if any + let (intparts',fracparts') = span ((/= mdecimalpoint) . Just . head) parts + (intparts, fracpart) = (filter numeric intparts', filter numeric fracparts') + groupsizes = reverse $ case map length intparts of + (a:b:cs) | a < b -> b:cs + gs -> gs + mgrps = (`DigitGroups` groupsizes) <$> mseparator + + -- put the parts back together without digit group separators, get the precision and parse the value + let int = concat $ "":intparts + frac = concat $ "":fracpart + precision = length frac + int' = if null int then "0" else int + frac' = if null frac then "0" else frac + quantity = read $ sign++int'++"."++frac' -- this read should never fail + + return $ dbg8 "numberp quantity,precision,mdecimalpoint,mgrps" (quantity,precision,mdecimalpoint,mgrps) + "numberp" + where + numeric = isNumber . headDef '_' + +-- test_numberp = do +-- let s `is` n = assertParseEqual (parseWithCtx nullctx numberp s) n +-- assertFails = assertBool . isLeft . parseWithCtx nullctx numberp +-- assertFails "" +-- "0" `is` (0, 0, '.', ',', []) +-- "1" `is` (1, 0, '.', ',', []) +-- "1.1" `is` (1.1, 1, '.', ',', []) +-- "1,000.1" `is` (1000.1, 1, '.', ',', [3]) +-- "1.00.000,1" `is` (100000.1, 1, ',', '.', [3,2]) +-- "1,000,000" `is` (1000000, 0, '.', ',', [3,3]) +-- "1." `is` (1, 0, '.', ',', []) +-- "1," `is` (1, 0, ',', '.', []) +-- ".1" `is` (0.1, 1, '.', ',', []) +-- ",1" `is` (0.1, 1, ',', '.', []) +-- assertFails "1,000.000,1" +-- assertFails "1.000,000.1" +-- assertFails "1,000.000.1" +-- assertFails "1,,1" +-- assertFails "1..1" +-- assertFails ".1," +-- assertFails ",1." + +--- ** comments + +multilinecommentp :: Monad m => JournalParser m () +multilinecommentp = do + string "comment" >> many spacenonewline >> newline + go + where + go = try (eof <|> (string "end comment" >> newline >> return ())) + <|> (anyLine >> go) + anyLine = anyChar `manyTill` newline + +emptyorcommentlinep :: Monad m => JournalParser m () +emptyorcommentlinep = do + many spacenonewline >> (commentp <|> (many spacenonewline >> newline >> return "")) + return () + +-- | Parse a possibly multi-line comment following a semicolon. +followingcommentp :: Monad m => JournalParser m String +followingcommentp = + -- ptrace "followingcommentp" + do samelinecomment <- many spacenonewline >> (try semicoloncommentp <|> (newline >> return "")) + newlinecomments <- many (try (many1 spacenonewline >> semicoloncommentp)) + return $ unlines $ samelinecomment:newlinecomments + +-- | Parse a possibly multi-line comment following a semicolon, and +-- any tags and/or posting dates within it. Posting dates can be +-- expressed with "date"/"date2" tags and/or bracketed dates. The +-- dates are parsed in full here so that errors are reported in the +-- right position. Missing years can be inferred if a default date is +-- provided. +-- +-- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; a:b, date:3/4, [=5/6]" +-- Right ("a:b, date:3/4, [=5/6]\n",[("a","b"),("date","3/4")],Just 2000-03-04,Just 2000-05-06) +-- +-- Year unspecified and no default provided -> unknown year error, at correct position: +-- >>> rejp (followingcommentandtagsp Nothing) " ; xxx date:3/4\n ; second line" +-- Left ...line 1, column 22...year is unknown... +-- +-- Date tag value contains trailing text - forgot the comma, confused: +-- the syntaxes ? We'll accept the leading date anyway +-- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; date:3/4=5/6" +-- Right ("date:3/4=5/6\n",[("date","3/4=5/6")],Just 2000-03-04,Nothing) +-- +followingcommentandtagsp :: Maybe Day -> ErroringJournalParser (String, [Tag], Maybe Day, Maybe Day) +followingcommentandtagsp mdefdate = do + -- pdbg 0 "followingcommentandtagsp" + + -- Parse a single or multi-line comment, starting on this line or the next one. + -- Save the starting position and preserve all whitespace for the subsequent re-parsing, + -- to get good error positions. + startpos <- getPosition + commentandwhitespace <- do + let semicoloncommentp' = (:) <$> char ';' <*> anyChar `manyTill` eolof + sp1 <- many spacenonewline + l1 <- try semicoloncommentp' <|> (newline >> return "") + ls <- many $ try ((++) <$> many1 spacenonewline <*> semicoloncommentp') + return $ unlines $ (sp1 ++ l1) : ls + let comment = unlines $ map (lstrip . dropWhile (==';') . strip) $ lines commentandwhitespace + -- pdbg 0 $ "commentws:"++show commentandwhitespace + -- pdbg 0 $ "comment:"++show comment + + -- Reparse the comment for any tags. + tags <- case runStringParser (setPosition startpos >> tagsp) commentandwhitespace of + Right ts -> return ts + Left e -> throwError $ show e + -- pdbg 0 $ "tags: "++show tags + + -- Reparse the comment for any posting dates. Use the transaction date for defaults, if provided. + epdates <- liftIO $ rejp (setPosition startpos >> postingdatesp mdefdate) commentandwhitespace + pdates <- case epdates of + Right ds -> return ds + Left e -> throwError e + -- pdbg 0 $ "pdates: "++show pdates + let mdate = headMay $ map snd $ filter ((=="date").fst) pdates + mdate2 = headMay $ map snd $ filter ((=="date2").fst) pdates + + return (comment, tags, mdate, mdate2) + +commentp :: Monad m => JournalParser m String +commentp = commentStartingWithp commentchars + +commentchars :: [Char] +commentchars = "#;*" + +semicoloncommentp :: Monad m => JournalParser m String +semicoloncommentp = commentStartingWithp ";" + +commentStartingWithp :: Monad m => String -> JournalParser m String +commentStartingWithp cs = do + -- ptrace "commentStartingWith" + oneOf cs + many spacenonewline + l <- anyChar `manyTill` eolof + optional newline + return l + +--- ** tags + +-- | Extract any tags (name:value ended by comma or newline) embedded in a string. +-- +-- >>> commentTags "a b:, c:c d:d, e" +-- [("b",""),("c","c d:d")] +-- +-- >>> commentTags "a [1/1/1] [1/1] [1], [=1/1/1] [=1/1] [=1] [1/1=1/1/1] [1=1/1/1] b:c" +-- [("b","c")] +-- +-- --[("date","1/1/1"),("date","1/1"),("date2","1/1/1"),("date2","1/1"),("date","1/1"),("date2","1/1/1"),("date","1"),("date2","1/1/1")] +-- +-- >>> commentTags "\na b:, \nd:e, f" +-- [("b",""),("d","e")] +-- +commentTags :: String -> [Tag] +commentTags s = + case runStringParser tagsp s of + Right r -> r + Left _ -> [] -- shouldn't happen + +-- | Parse all tags found in a string. +tagsp :: StringParser u Identity [Tag] +tagsp = -- do + -- pdbg 0 $ "tagsp" + many (try (nontagp >> tagp)) + +-- | Parse everything up till the first tag. +-- +-- >>> rsp nontagp "\na b:, \nd:e, f" +-- Right "\na " +nontagp :: StringParser u Identity String +nontagp = -- do + -- pdbg 0 "nontagp" + -- anyChar `manyTill` (lookAhead (try (tagorbracketeddatetagsp Nothing >> return ()) <|> eof)) + anyChar `manyTill` lookAhead (try (void tagp) <|> eof) + -- XXX costly ? + +-- | Tags begin with a colon-suffixed tag name (a word beginning with +-- a letter) and are followed by a tag value (any text up to a comma +-- or newline, whitespace-stripped). +-- +-- >>> rsp tagp "a:b b , c AuxDate: 4/2" +-- Right ("a","b b") +-- +tagp :: Monad m => StringParser u m Tag +tagp = do + -- pdbg 0 "tagp" + n <- tagnamep + v <- tagvaluep + return (n,v) + +-- | +-- >>> rsp tagnamep "a:" +-- Right "a" +tagnamep :: Monad m => StringParser u m String +tagnamep = -- do + -- pdbg 0 "tagnamep" + many1 (noneOf ": \t\n") <* char ':' + +tagvaluep :: Monad m => StringParser u m String +tagvaluep = do + -- ptrace "tagvalue" + v <- anyChar `manyTill` (void (try (char ',')) <|> eolof) + return $ strip $ reverse $ dropWhile (==',') $ reverse $ strip v + +--- ** posting dates + +-- | Parse all posting dates found in a string. Posting dates can be +-- expressed with date/date2 tags and/or bracketed dates. The dates +-- are parsed fully to give useful errors. Missing years can be +-- inferred only if a default date is provided. +-- +postingdatesp :: Maybe Day -> ErroringJournalParser [(TagName,Day)] +postingdatesp mdefdate = do + -- pdbg 0 $ "postingdatesp" + let p = ((:[]) <$> datetagp mdefdate) <|> bracketeddatetagsp mdefdate + nonp = + many (notFollowedBy p >> anyChar) + -- anyChar `manyTill` (lookAhead (try (p >> return ()) <|> eof)) + concat <$> many (try (nonp >> p)) + +--- ** date tags + +-- | Date tags are tags with name "date" or "date2". Their value is +-- parsed as a date, using the provided default date if any for +-- inferring a missing year if needed. Any error in date parsing is +-- reported and terminates parsing. +-- +-- >>> rejp (datetagp Nothing) "date: 2000/1/2 " +-- Right ("date",2000-01-02) +-- +-- >>> rejp (datetagp (Just $ fromGregorian 2001 2 3)) "date2:3/4" +-- Right ("date2",2001-03-04) +-- +-- >>> rejp (datetagp Nothing) "date: 3/4" +-- Left ...line 1, column 9...year is unknown... +-- +datetagp :: Maybe Day -> ErroringJournalParser (TagName,Day) +datetagp mdefdate = do + -- pdbg 0 "datetagp" + string "date" + n <- fromMaybe "" <$> optionMaybe (string "2") + char ':' + startpos <- getPosition + v <- tagvaluep + -- re-parse value as a date. + ctx <- getState + ep <- parseWithCtx + ctx{ctxYear=first3.toGregorian <$> mdefdate} + -- The value extends to a comma, newline, or end of file. + -- It seems like ignoring any extra stuff following a date + -- gives better errors here. + (do + setPosition startpos + datep) -- <* eof) + v + case ep + of Left e -> throwError $ show e + Right d -> return ("date"++n, d) + +--- ** bracketed dates + +-- tagorbracketeddatetagsp :: Monad m => Maybe Day -> StringParser u m [Tag] +-- tagorbracketeddatetagsp mdefdate = +-- bracketeddatetagsp mdefdate <|> ((:[]) <$> tagp) + +-- | Parse Ledger-style bracketed posting dates ([DATE=DATE2]), as +-- "date" and/or "date2" tags. Anything that looks like an attempt at +-- this (a square-bracketed sequence of 0123456789/-.= containing at +-- least one digit and one date separator) is also parsed, and will +-- throw an appropriate error. +-- +-- The dates are parsed in full here so that errors are reported in +-- the right position. A missing year in DATE can be inferred if a +-- default date is provided. A missing year in DATE2 will be inferred +-- from DATE. +-- +-- >>> rejp (bracketeddatetagsp Nothing) "[2016/1/2=3/4]" +-- Right [("date",2016-01-02),("date2",2016-03-04)] +-- +-- >>> rejp (bracketeddatetagsp Nothing) "[1]" +-- Left ...not a bracketed date... +-- +-- >>> rejp (bracketeddatetagsp Nothing) "[2016/1/32]" +-- Left ...line 1, column 11...bad date... +-- +-- >>> rejp (bracketeddatetagsp Nothing) "[1/31]" +-- Left ...line 1, column 6...year is unknown... +-- +-- >>> rejp (bracketeddatetagsp Nothing) "[0123456789/-.=/-.=]" +-- Left ...line 1, column 15...bad date, different separators... +-- +bracketeddatetagsp :: Maybe Day -> ErroringJournalParser [(TagName, Day)] +bracketeddatetagsp mdefdate = do + -- pdbg 0 "bracketeddatetagsp" + char '[' + startpos <- getPosition + let digits = "0123456789" + s <- many1 (oneOf $ '=':digits++datesepchars) + char ']' + unless (any (`elem` s) digits && any (`elem` datesepchars) s) $ + parserFail "not a bracketed date" + + -- looks sufficiently like a bracketed date, now we + -- re-parse as dates and throw any errors + ctx <- getState + ep <- parseWithCtx + ctx{ctxYear=first3.toGregorian <$> mdefdate} + (do + setPosition startpos + md1 <- optionMaybe datep + maybe (return ()) (setYear.first3.toGregorian) md1 + md2 <- optionMaybe $ char '=' >> datep + eof + return (md1,md2) + ) + s + case ep + of Left e -> throwError $ show e + Right (md1,md2) -> return $ catMaybes + [("date",) <$> md1, ("date2",) <$> md2] + diff --git a/hledger-lib/Hledger/Read/CsvReader.hs b/hledger-lib/Hledger/Read/CsvReader.hs index cdf4f353d..842e741ed 100644 --- a/hledger-lib/Hledger/Read/CsvReader.hs +++ b/hledger-lib/Hledger/Read/CsvReader.hs @@ -51,7 +51,7 @@ import Text.Printf (hPrintf,printf) import Hledger.Data import Hledger.Utils.UTF8IOCompat (getContents) import Hledger.Utils -import Hledger.Read.JournalReader (amountp, statusp, genericSourcePos) +import Hledger.Read.Common (amountp, statusp, genericSourcePos) reader :: Reader diff --git a/hledger-lib/Hledger/Read/JournalReader.hs b/hledger-lib/Hledger/Read/JournalReader.hs index 20128cc85..dabeb95df 100644 --- a/hledger-lib/Hledger/Read/JournalReader.hs +++ b/hledger-lib/Hledger/Read/JournalReader.hs @@ -19,12 +19,16 @@ reader should handle many ledger files as well. Example: assets:cash @ +Journal format supports the include directive which can read files in +other formats, so the other file format readers need to be importable +here. Some low-level journal syntax parsers which those readers also +use are therefore defined separately in Hledger.Read.Common, avoiding +import cycles. + -} --- * module --- {-# OPTIONS_GHC -F -pgmF htfpp #-} - {-# LANGUAGE CPP, RecordWildCards, NamedFieldPuns, NoMonoLocalBinds, ScopedTypeVariables, FlexibleContexts, TupleSections #-} module Hledger.Read.JournalReader ( @@ -64,27 +68,18 @@ module Hledger.Read.JournalReader ( emptyorcommentlinep, followingcommentp, accountaliasp + -- * Tests ,tests_Hledger_Read_JournalReader -#ifdef TESTS - -- disabled by default, HTF not available on windows - ,htf_thisModulesTests - ,htf_Hledger_Read_JournalReader_importedTests -#endif + ) where --- * imports import Prelude () import Prelude.Compat hiding (readFile) import qualified Control.Exception as C -import Control.Monad.Compat import Control.Monad.Except (ExceptT(..), liftIO, runExceptT, throwError, catchError) -import Data.Char (isNumber) -import Data.Functor.Identity -import Data.List.Compat -import Data.List.Split (wordsBy) import qualified Data.Map.Strict as M -import Data.Maybe import Data.Time.Calendar import Data.Time.LocalTime import Safe @@ -96,9 +91,11 @@ import Text.Parsec.Error import Text.Parsec hiding (parse) import Text.Printf import System.FilePath -import System.Time (getClockTime) import Hledger.Data +import Hledger.Read.Common +import Hledger.Read.TimeclockReader (timeclockfilep) +import Hledger.Read.TimedotReader (timedotfilep) import Hledger.Utils @@ -121,154 +118,6 @@ detect f s parse :: Maybe FilePath -> Bool -> FilePath -> String -> ExceptT String IO Journal parse _ = parseAndFinaliseJournal journalp ---- * parsing utils - --- | A parser of strings with generic user state, monad and return type. -type StringParser u m a = ParsecT String u m a - --- | A string parser with journal-parsing state. -type JournalParser m a = StringParser JournalContext m a - --- | A journal parser that runs in IO and can throw an error mid-parse. -type ErroringJournalParser a = JournalParser (ExceptT String IO) a - --- | Run a string parser with no state in the identity monad. -runStringParser, rsp :: StringParser () Identity a -> String -> Either ParseError a -runStringParser p s = runIdentity $ runParserT p () "" s -rsp = runStringParser - --- | Run a journal parser with a null journal-parsing state. -runJournalParser, rjp :: Monad m => JournalParser m a -> String -> m (Either ParseError a) -runJournalParser p s = runParserT p nullctx "" s -rjp = runJournalParser - --- | Run an error-raising journal parser with a null journal-parsing state. -runErroringJournalParser, rejp :: ErroringJournalParser a -> String -> IO (Either String a) -runErroringJournalParser p s = runExceptT $ runJournalParser p s >>= either (throwError.show) return -rejp = runErroringJournalParser - -genericSourcePos :: SourcePos -> GenericSourcePos -genericSourcePos p = GenericSourcePos (sourceName p) (sourceLine p) (sourceColumn p) - --- | Flatten a list of JournalUpdate's (journal-transforming --- monadic actions which can do IO or raise an exception) into a --- single equivalent action. -combineJournalUpdates :: [JournalUpdate] -> JournalUpdate -combineJournalUpdates us = foldl' (flip (.)) id <$> sequence us --- XXX may be contributing to excessive stack use - --- cf http://neilmitchell.blogspot.co.uk/2015/09/detecting-space-leaks.html --- $ ./devprof +RTS -K576K -xc --- Exception (reporting due to +RTS -xc): (THUNK_STATIC), stack trace: --- Hledger.Read.JournalReader.combineJournalUpdates.\, --- called from Hledger.Read.JournalReader.combineJournalUpdates, --- called from Hledger.Read.JournalReader.fixedlotprice, --- called from Hledger.Read.JournalReader.partialbalanceassertion, --- called from Hledger.Read.JournalReader.getDefaultCommodityAndStyle, --- called from Hledger.Read.JournalReader.priceamount, --- called from Hledger.Read.JournalReader.nosymbolamount, --- called from Hledger.Read.JournalReader.numberp, --- called from Hledger.Read.JournalReader.rightsymbolamount, --- called from Hledger.Read.JournalReader.simplecommoditysymbol, --- called from Hledger.Read.JournalReader.quotedcommoditysymbol, --- called from Hledger.Read.JournalReader.commoditysymbol, --- called from Hledger.Read.JournalReader.signp, --- called from Hledger.Read.JournalReader.leftsymbolamount, --- called from Hledger.Read.JournalReader.amountp, --- called from Hledger.Read.JournalReader.spaceandamountormissing, --- called from Hledger.Read.JournalReader.accountnamep.singlespace, --- called from Hledger.Utils.Parse.nonspace, --- called from Hledger.Read.JournalReader.accountnamep, --- called from Hledger.Read.JournalReader.getAccountAliases, --- called from Hledger.Read.JournalReader.getParentAccount, --- called from Hledger.Read.JournalReader.modifiedaccountnamep, --- called from Hledger.Read.JournalReader.postingp, --- called from Hledger.Read.JournalReader.postings, --- called from Hledger.Read.JournalReader.commentStartingWith, --- called from Hledger.Read.JournalReader.semicoloncomment, --- called from Hledger.Read.JournalReader.followingcommentp, --- called from Hledger.Read.JournalReader.descriptionp, --- called from Hledger.Read.JournalReader.codep, --- called from Hledger.Read.JournalReader.statusp, --- called from Hledger.Utils.Parse.spacenonewline, --- called from Hledger.Read.JournalReader.secondarydatep, --- called from Hledger.Data.Dates.datesepchar, --- called from Hledger.Read.JournalReader.datep, --- called from Hledger.Read.JournalReader.transaction, --- called from Hledger.Utils.Parse.choice', --- called from Hledger.Read.JournalReader.directive, --- called from Hledger.Read.JournalReader.emptyorcommentlinep, --- called from Hledger.Read.JournalReader.multilinecommentp, --- called from Hledger.Read.JournalReader.journal.journalItem, --- called from Hledger.Read.JournalReader.journal, --- called from Hledger.Read.JournalReader.parseJournalWith, --- called from Hledger.Read.readJournal.tryReaders.firstSuccessOrBestError, --- called from Hledger.Read.readJournal.tryReaders, --- called from Hledger.Read.readJournal, --- called from Main.main, --- called from Main.CAF --- Stack space overflow: current size 33568 bytes. - --- | Given a JournalUpdate-generating parsec parser, file path and data string, --- parse and post-process a Journal so that it's ready to use, or give an error. -parseAndFinaliseJournal :: ErroringJournalParser (JournalUpdate,JournalContext) -> Bool -> FilePath -> String -> ExceptT String IO Journal -parseAndFinaliseJournal parser assrt f s = do - tc <- liftIO getClockTime - tl <- liftIO getCurrentLocalTime - y <- liftIO getCurrentYear - r <- runParserT parser nullctx{ctxYear=Just y} f s - case r of - Right (updates,ctx) -> do - j <- ap updates (return nulljournal) - case journalFinalise tc tl f s ctx assrt j of - Right j' -> return j' - Left estr -> throwError estr - Left e -> throwError $ show e - -setYear :: Monad m => Integer -> JournalParser m () -setYear y = modifyState (\ctx -> ctx{ctxYear=Just y}) - -getYear :: Monad m => JournalParser m (Maybe Integer) -getYear = fmap ctxYear getState - -setDefaultCommodityAndStyle :: Monad m => (CommoditySymbol,AmountStyle) -> JournalParser m () -setDefaultCommodityAndStyle cs = modifyState (\ctx -> ctx{ctxDefaultCommodityAndStyle=Just cs}) - -getDefaultCommodityAndStyle :: Monad m => JournalParser m (Maybe (CommoditySymbol,AmountStyle)) -getDefaultCommodityAndStyle = ctxDefaultCommodityAndStyle `fmap` getState - -pushAccount :: Monad m => String -> JournalParser m () -pushAccount acct = modifyState addAccount - where addAccount ctx0 = ctx0 { ctxAccounts = acct : ctxAccounts ctx0 } - -pushParentAccount :: Monad m => String -> JournalParser m () -pushParentAccount parent = modifyState addParentAccount - where addParentAccount ctx0 = ctx0 { ctxParentAccount = parent : ctxParentAccount ctx0 } - -popParentAccount :: Monad m => JournalParser m () -popParentAccount = do ctx0 <- getState - case ctxParentAccount ctx0 of - [] -> unexpected "End of apply account block with no beginning" - (_:rest) -> setState $ ctx0 { ctxParentAccount = rest } - -getParentAccount :: Monad m => JournalParser m String -getParentAccount = fmap (concatAccountNames . reverse . ctxParentAccount) getState - -addAccountAlias :: Monad m => AccountAlias -> JournalParser m () -addAccountAlias a = modifyState (\(ctx@Ctx{..}) -> ctx{ctxAliases=a:ctxAliases}) - -getAccountAliases :: Monad m => JournalParser m [AccountAlias] -getAccountAliases = fmap ctxAliases getState - -clearAccountAliases :: Monad m => JournalParser m () -clearAccountAliases = modifyState (\(ctx@Ctx{..}) -> ctx{ctxAliases=[]}) - -getIndex :: Monad m => JournalParser m Integer -getIndex = fmap ctxTransactionIndex getState - -setIndex :: Monad m => Integer -> JournalParser m () -setIndex i = modifyState (\ctx -> ctx{ctxTransactionIndex=i}) - --- * parsers --- ** journal @@ -325,11 +174,18 @@ includedirectivep = do outerState <- getState outerPos <- getPosition let curdir = takeDirectory (sourceName outerPos) + -- XXX clean this up, probably after getting rid of JournalUpdate let (u::ExceptT String IO (Journal -> Journal, JournalContext)) = do filepath <- expandPath curdir filename txt <- readFileOrError outerPos filepath let inIncluded = show outerPos ++ " in included file " ++ show filename ++ ":\n" - r <- runParserT journalp outerState filepath txt + r <- runParserT + (choice' [journalp + ,timeclockfilep + ,timedotfilep + -- can't include a csv file yet, that reader is special + ]) + outerState filepath txt case r of Right (ju, ctx) -> do @@ -346,12 +202,6 @@ includedirectivep = do Left err -> return $ throwError err Right (ju, _finalparsectx) -> return $ ExceptT $ return $ Right ju -journalAddFile :: (FilePath,String) -> Journal -> Journal -journalAddFile f j@Journal{files=fs} = j{files=fs++[f]} - -- NOTE: first encountered file to left, to avoid a reverse - -indentedlinep = many1 spacenonewline >> (rstrip <$> restofline) - accountdirectivep :: ErroringJournalParser JournalUpdate accountdirectivep = do string "account" @@ -362,17 +212,7 @@ accountdirectivep = do pushAccount acct return $ ExceptT $ return $ Right id --- -- | Terminate parsing entirely, returning the given error message --- -- with the current parse position prepended. --- parserError :: String -> ErroringJournalParser a --- parserError s = do --- pos <- getPosition --- parserErrorAt pos s - --- | Terminate parsing entirely, returning the given error message --- with the given parse position prepended. -parserErrorAt :: SourcePos -> String -> ErroringJournalParser a -parserErrorAt pos s = throwError $ show pos ++ ":\n" ++ s +indentedlinep = many1 spacenonewline >> (rstrip <$> restofline) -- | Parse a one-line or multi-line commodity directive. -- @@ -673,107 +513,6 @@ test_transactionp = do assertEqual 2 (let Right t = p in length $ tpostings t) #endif -statusp :: Monad m => JournalParser m ClearedStatus -statusp = - choice' - [ many spacenonewline >> char '*' >> return Cleared - , many spacenonewline >> char '!' >> return Pending - , return Uncleared - ] - "cleared status" - -codep :: Monad m => JournalParser m String -codep = try (do { many1 spacenonewline; char '(' "codep"; anyChar `manyTill` char ')' } ) <|> return "" - -descriptionp = many (noneOf ";\n") - ---- ** dates - --- | Parse a date in YYYY/MM/DD format. --- Hyphen (-) and period (.) are also allowed as separators. --- The year may be omitted if a default year has been set. --- Leading zeroes may be omitted. -datep :: Monad m => JournalParser m Day -datep = do - -- hacky: try to ensure precise errors for invalid dates - -- XXX reported error position is not too good - -- pos <- genericSourcePos <$> getPosition - datestr <- do - c <- digit - cs <- many $ choice' [digit, datesepchar] - return $ c:cs - let sepchars = nub $ sort $ filter (`elem` datesepchars) datestr - when (length sepchars /= 1) $ fail $ "bad date, different separators used: " ++ datestr - let dateparts = wordsBy (`elem` datesepchars) datestr - currentyear <- getYear - [y,m,d] <- case (dateparts,currentyear) of - ([m,d],Just y) -> return [show y,m,d] - ([_,_],Nothing) -> fail $ "partial date "++datestr++" found, but the current year is unknown" - ([y,m,d],_) -> return [y,m,d] - _ -> fail $ "bad date: " ++ datestr - let maybedate = fromGregorianValid (read y) (read m) (read d) - case maybedate of - Nothing -> fail $ "bad date: " ++ datestr - Just date -> return date - "full or partial date" - --- | Parse a date and time in YYYY/MM/DD HH:MM[:SS][+-ZZZZ] format. --- Hyphen (-) and period (.) are also allowed as date separators. --- The year may be omitted if a default year has been set. --- Seconds are optional. --- The timezone is optional and ignored (the time is always interpreted as a local time). --- Leading zeroes may be omitted (except in a timezone). -datetimep :: Monad m => JournalParser m LocalTime -datetimep = do - day <- datep - many1 spacenonewline - h <- many1 digit - let h' = read h - guard $ h' >= 0 && h' <= 23 - char ':' - m <- many1 digit - let m' = read m - guard $ m' >= 0 && m' <= 59 - s <- optionMaybe $ char ':' >> many1 digit - let s' = case s of Just sstr -> read sstr - Nothing -> 0 - guard $ s' >= 0 && s' <= 59 - {- tz <- -} - optionMaybe $ do - plusminus <- oneOf "-+" - d1 <- digit - d2 <- digit - d3 <- digit - d4 <- digit - return $ plusminus:d1:d2:d3:d4:"" - -- ltz <- liftIO $ getCurrentTimeZone - -- let tz' = maybe ltz (fromMaybe ltz . parseTime defaultTimeLocale "%z") tz - -- return $ localTimeToUTC tz' $ LocalTime day $ TimeOfDay h' m' (fromIntegral s') - return $ LocalTime day $ TimeOfDay h' m' (fromIntegral s') - -secondarydatep :: Monad m => Day -> JournalParser m Day -secondarydatep primarydate = do - char '=' - -- kludgy way to use primary date for default year - let withDefaultYear d p = do - y <- getYear - let (y',_,_) = toGregorian d in setYear y' - r <- p - when (isJust y) $ setYear $ fromJust y -- XXX - -- mapM setYear <$> y - return r - withDefaultYear primarydate datep - --- | --- >> parsewith twoorthreepartdatestringp "2016/01/2" --- Right "2016/01/2" --- twoorthreepartdatestringp = do --- n1 <- many1 digit --- c <- datesepchar --- n2 <- many1 digit --- mn3 <- optionMaybe $ char c >> many1 digit --- return $ n1 ++ c:n2 ++ maybe "" (c:) mn3 - --- ** postings -- Parse the following whitespace-beginning lines as postings, posting @@ -861,566 +600,6 @@ test_postingp = do -- assertEqual (Just nullmixedamt) (pbalanceassertion p) #endif ---- ** account names - --- | Parse an account name, then apply any parent account prefix and/or account aliases currently in effect. -modifiedaccountnamep :: Monad m => JournalParser m AccountName -modifiedaccountnamep = do - parent <- getParentAccount - aliases <- getAccountAliases - a <- accountnamep - return $ - accountNameApplyAliases aliases $ - -- XXX accountNameApplyAliasesMemo ? doesn't seem to make a difference - joinAccountNames parent - a - --- | Parse an account name. Account names start with a non-space, may --- have single spaces inside them, and are terminated by two or more --- spaces (or end of input). Also they have one or more components of --- at least one character, separated by the account separator char. --- (This parser will also consume one following space, if present.) -accountnamep :: Monad m => StringParser u m AccountName -accountnamep = do - a <- do - c <- nonspace - cs <- striptrailingspace <$> many (nonspace <|> singlespace) - return $ c:cs - when (accountNameFromComponents (accountNameComponents a) /= a) - (fail $ "account name seems ill-formed: "++a) - return a - where - singlespace = try (do {spacenonewline; do {notFollowedBy spacenonewline; return ' '}}) - striptrailingspace "" = "" - striptrailingspace s = if last s == ' ' then init s else s - --- accountnamechar = notFollowedBy (oneOf "()[]") >> nonspace --- "account name character (non-bracket, non-parenthesis, non-whitespace)" - ---- ** amounts - --- | Parse whitespace then an amount, with an optional left or right --- currency symbol and optional price, or return the special --- "missing" marker amount. -spaceandamountormissingp :: Monad m => JournalParser m MixedAmount -spaceandamountormissingp = - try (do - many1 spacenonewline - (Mixed . (:[])) `fmap` amountp <|> return missingmixedamt - ) <|> return missingmixedamt - -#ifdef TESTS -assertParseEqual' :: (Show a, Eq a) => (Either ParseError a) -> a -> Assertion -assertParseEqual' parse expected = either (assertFailure.show) (`is'` expected) parse - -is' :: (Eq a, Show a) => a -> a -> Assertion -a `is'` e = assertEqual e a - -test_spaceandamountormissingp = do - assertParseEqual' (parseWithCtx nullctx spaceandamountormissingp " $47.18") (Mixed [usd 47.18]) - assertParseEqual' (parseWithCtx nullctx spaceandamountormissingp "$47.18") missingmixedamt - assertParseEqual' (parseWithCtx nullctx spaceandamountormissingp " ") missingmixedamt - assertParseEqual' (parseWithCtx nullctx spaceandamountormissingp "") missingmixedamt -#endif - --- | Parse a single-commodity amount, with optional symbol on the left or --- right, optional unit or total price, and optional (ignored) --- ledger-style balance assertion or fixed lot price declaration. -amountp :: Monad m => JournalParser m Amount -amountp = try leftsymbolamountp <|> try rightsymbolamountp <|> nosymbolamountp - -#ifdef TESTS -test_amountp = do - assertParseEqual' (parseWithCtx nullctx amountp "$47.18") (usd 47.18) - assertParseEqual' (parseWithCtx nullctx amountp "$1.") (usd 1 `withPrecision` 0) - -- ,"amount with unit price" ~: do - assertParseEqual' - (parseWithCtx nullctx amountp "$10 @ €0.5") - (usd 10 `withPrecision` 0 `at` (eur 0.5 `withPrecision` 1)) - -- ,"amount with total price" ~: do - assertParseEqual' - (parseWithCtx nullctx amountp "$10 @@ €5") - (usd 10 `withPrecision` 0 @@ (eur 5 `withPrecision` 0)) -#endif - --- | Parse an amount from a string, or get an error. -amountp' :: String -> Amount -amountp' s = - case runParser (amountp <* eof) nullctx "" s of - Right t -> t - Left err -> error' $ show err -- XXX should throwError - --- | Parse a mixed amount from a string, or get an error. -mamountp' :: String -> MixedAmount -mamountp' = Mixed . (:[]) . amountp' - -signp :: Monad m => JournalParser m String -signp = do - sign <- optionMaybe $ oneOf "+-" - return $ case sign of Just '-' -> "-" - _ -> "" - -leftsymbolamountp :: Monad m => JournalParser m Amount -leftsymbolamountp = do - sign <- signp - c <- commoditysymbolp - sp <- many spacenonewline - (q,prec,mdec,mgrps) <- numberp - let s = amountstyle{ascommodityside=L, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps} - p <- priceamountp - let applysign = if sign=="-" then negate else id - return $ applysign $ Amount c q p s - "left-symbol amount" - -rightsymbolamountp :: Monad m => JournalParser m Amount -rightsymbolamountp = do - (q,prec,mdec,mgrps) <- numberp - sp <- many spacenonewline - c <- commoditysymbolp - p <- priceamountp - let s = amountstyle{ascommodityside=R, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps} - return $ Amount c q p s - "right-symbol amount" - -nosymbolamountp :: Monad m => JournalParser m Amount -nosymbolamountp = do - (q,prec,mdec,mgrps) <- numberp - p <- priceamountp - -- apply the most recently seen default commodity and style to this commodityless amount - defcs <- getDefaultCommodityAndStyle - let (c,s) = case defcs of - Just (defc,defs) -> (defc, defs{asprecision=max (asprecision defs) prec}) - Nothing -> ("", amountstyle{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}) - return $ Amount c q p s - "no-symbol amount" - -commoditysymbolp :: Monad m => JournalParser m String -commoditysymbolp = (quotedcommoditysymbolp <|> simplecommoditysymbolp) "commodity symbol" - -quotedcommoditysymbolp :: Monad m => JournalParser m String -quotedcommoditysymbolp = do - char '"' - s <- many1 $ noneOf ";\n\"" - char '"' - return s - -simplecommoditysymbolp :: Monad m => JournalParser m String -simplecommoditysymbolp = many1 (noneOf nonsimplecommoditychars) - -priceamountp :: Monad m => JournalParser m Price -priceamountp = - try (do - many spacenonewline - char '@' - try (do - char '@' - many spacenonewline - a <- amountp -- XXX can parse more prices ad infinitum, shouldn't - return $ TotalPrice a) - <|> (do - many spacenonewline - a <- amountp -- XXX can parse more prices ad infinitum, shouldn't - return $ UnitPrice a)) - <|> return NoPrice - -partialbalanceassertionp :: Monad m => JournalParser m (Maybe MixedAmount) -partialbalanceassertionp = - try (do - many spacenonewline - char '=' - many spacenonewline - a <- amountp -- XXX should restrict to a simple amount - return $ Just $ Mixed [a]) - <|> return Nothing - --- balanceassertion :: Monad m => JournalParser m (Maybe MixedAmount) --- balanceassertion = --- try (do --- many spacenonewline --- string "==" --- many spacenonewline --- a <- amountp -- XXX should restrict to a simple amount --- return $ Just $ Mixed [a]) --- <|> return Nothing - --- http://ledger-cli.org/3.0/doc/ledger3.html#Fixing-Lot-Prices -fixedlotpricep :: Monad m => JournalParser m (Maybe Amount) -fixedlotpricep = - try (do - many spacenonewline - char '{' - many spacenonewline - char '=' - many spacenonewline - a <- amountp -- XXX should restrict to a simple amount - many spacenonewline - char '}' - return $ Just a) - <|> return Nothing - --- | Parse a string representation of a number for its value and display --- attributes. --- --- Some international number formats are accepted, eg either period or comma --- may be used for the decimal point, and the other of these may be used for --- separating digit groups in the integer part. See --- http://en.wikipedia.org/wiki/Decimal_separator for more examples. --- --- This returns: the parsed numeric value, the precision (number of digits --- seen following the decimal point), the decimal point character used if any, --- and the digit group style if any. --- -numberp :: Monad m => JournalParser m (Quantity, Int, Maybe Char, Maybe DigitGroupStyle) -numberp = do - -- a number is an optional sign followed by a sequence of digits possibly - -- interspersed with periods, commas, or both - -- ptrace "numberp" - sign <- signp - parts <- many1 $ choice' [many1 digit, many1 $ char ',', many1 $ char '.'] - dbg8 "numberp parsed" (sign,parts) `seq` return () - - -- check the number is well-formed and identify the decimal point and digit - -- group separator characters used, if any - let (numparts, puncparts) = partition numeric parts - (ok, mdecimalpoint, mseparator) = - case (numparts, puncparts) of - ([],_) -> (False, Nothing, Nothing) -- no digits, not ok - (_,[]) -> (True, Nothing, Nothing) -- digits with no punctuation, ok - (_,[[d]]) -> (True, Just d, Nothing) -- just a single punctuation of length 1, assume it's a decimal point - (_,[_]) -> (False, Nothing, Nothing) -- a single punctuation of some other length, not ok - (_,_:_:_) -> -- two or more punctuations - let (s:ss, d) = (init puncparts, last puncparts) -- the leftmost is a separator and the rightmost may be a decimal point - in if any ((/=1).length) puncparts -- adjacent punctuation chars, not ok - || any (s/=) ss -- separator chars vary, not ok - || head parts == s -- number begins with a separator char, not ok - then (False, Nothing, Nothing) - else if s == d - then (True, Nothing, Just $ head s) -- just one kind of punctuation - must be separators - else (True, Just $ head d, Just $ head s) -- separator(s) and a decimal point - unless ok $ fail $ "number seems ill-formed: "++concat parts - - -- get the digit group sizes and digit group style if any - let (intparts',fracparts') = span ((/= mdecimalpoint) . Just . head) parts - (intparts, fracpart) = (filter numeric intparts', filter numeric fracparts') - groupsizes = reverse $ case map length intparts of - (a:b:cs) | a < b -> b:cs - gs -> gs - mgrps = (`DigitGroups` groupsizes) <$> mseparator - - -- put the parts back together without digit group separators, get the precision and parse the value - let int = concat $ "":intparts - frac = concat $ "":fracpart - precision = length frac - int' = if null int then "0" else int - frac' = if null frac then "0" else frac - quantity = read $ sign++int'++"."++frac' -- this read should never fail - - return $ dbg8 "numberp quantity,precision,mdecimalpoint,mgrps" (quantity,precision,mdecimalpoint,mgrps) - "numberp" - where - numeric = isNumber . headDef '_' - --- test_numberp = do --- let s `is` n = assertParseEqual (parseWithCtx nullctx numberp s) n --- assertFails = assertBool . isLeft . parseWithCtx nullctx numberp --- assertFails "" --- "0" `is` (0, 0, '.', ',', []) --- "1" `is` (1, 0, '.', ',', []) --- "1.1" `is` (1.1, 1, '.', ',', []) --- "1,000.1" `is` (1000.1, 1, '.', ',', [3]) --- "1.00.000,1" `is` (100000.1, 1, ',', '.', [3,2]) --- "1,000,000" `is` (1000000, 0, '.', ',', [3,3]) --- "1." `is` (1, 0, '.', ',', []) --- "1," `is` (1, 0, ',', '.', []) --- ".1" `is` (0.1, 1, '.', ',', []) --- ",1" `is` (0.1, 1, ',', '.', []) --- assertFails "1,000.000,1" --- assertFails "1.000,000.1" --- assertFails "1,000.000.1" --- assertFails "1,,1" --- assertFails "1..1" --- assertFails ".1," --- assertFails ",1." - ---- ** comments - -multilinecommentp :: Monad m => JournalParser m () -multilinecommentp = do - string "comment" >> many spacenonewline >> newline - go - where - go = try (eof <|> (string "end comment" >> newline >> return ())) - <|> (anyLine >> go) - anyLine = anyChar `manyTill` newline - -emptyorcommentlinep :: Monad m => JournalParser m () -emptyorcommentlinep = do - many spacenonewline >> (commentp <|> (many spacenonewline >> newline >> return "")) - return () - --- | Parse a possibly multi-line comment following a semicolon. -followingcommentp :: Monad m => JournalParser m String -followingcommentp = - -- ptrace "followingcommentp" - do samelinecomment <- many spacenonewline >> (try semicoloncommentp <|> (newline >> return "")) - newlinecomments <- many (try (many1 spacenonewline >> semicoloncommentp)) - return $ unlines $ samelinecomment:newlinecomments - --- | Parse a possibly multi-line comment following a semicolon, and --- any tags and/or posting dates within it. Posting dates can be --- expressed with "date"/"date2" tags and/or bracketed dates. The --- dates are parsed in full here so that errors are reported in the --- right position. Missing years can be inferred if a default date is --- provided. --- --- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; a:b, date:3/4, [=5/6]" --- Right ("a:b, date:3/4, [=5/6]\n",[("a","b"),("date","3/4")],Just 2000-03-04,Just 2000-05-06) --- --- Year unspecified and no default provided -> unknown year error, at correct position: --- >>> rejp (followingcommentandtagsp Nothing) " ; xxx date:3/4\n ; second line" --- Left ...line 1, column 22...year is unknown... --- --- Date tag value contains trailing text - forgot the comma, confused: --- the syntaxes ? We'll accept the leading date anyway --- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; date:3/4=5/6" --- Right ("date:3/4=5/6\n",[("date","3/4=5/6")],Just 2000-03-04,Nothing) --- -followingcommentandtagsp :: Maybe Day -> ErroringJournalParser (String, [Tag], Maybe Day, Maybe Day) -followingcommentandtagsp mdefdate = do - -- pdbg 0 "followingcommentandtagsp" - - -- Parse a single or multi-line comment, starting on this line or the next one. - -- Save the starting position and preserve all whitespace for the subsequent re-parsing, - -- to get good error positions. - startpos <- getPosition - commentandwhitespace <- do - let semicoloncommentp' = (:) <$> char ';' <*> anyChar `manyTill` eolof - sp1 <- many spacenonewline - l1 <- try semicoloncommentp' <|> (newline >> return "") - ls <- many $ try ((++) <$> many1 spacenonewline <*> semicoloncommentp') - return $ unlines $ (sp1 ++ l1) : ls - let comment = unlines $ map (lstrip . dropWhile (==';') . strip) $ lines commentandwhitespace - -- pdbg 0 $ "commentws:"++show commentandwhitespace - -- pdbg 0 $ "comment:"++show comment - - -- Reparse the comment for any tags. - tags <- case runStringParser (setPosition startpos >> tagsp) commentandwhitespace of - Right ts -> return ts - Left e -> throwError $ show e - -- pdbg 0 $ "tags: "++show tags - - -- Reparse the comment for any posting dates. Use the transaction date for defaults, if provided. - epdates <- liftIO $ rejp (setPosition startpos >> postingdatesp mdefdate) commentandwhitespace - pdates <- case epdates of - Right ds -> return ds - Left e -> throwError e - -- pdbg 0 $ "pdates: "++show pdates - let mdate = headMay $ map snd $ filter ((=="date").fst) pdates - mdate2 = headMay $ map snd $ filter ((=="date2").fst) pdates - - return (comment, tags, mdate, mdate2) - -commentp :: Monad m => JournalParser m String -commentp = commentStartingWithp commentchars - -commentchars :: [Char] -commentchars = "#;*" - -semicoloncommentp :: Monad m => JournalParser m String -semicoloncommentp = commentStartingWithp ";" - -commentStartingWithp :: Monad m => String -> JournalParser m String -commentStartingWithp cs = do - -- ptrace "commentStartingWith" - oneOf cs - many spacenonewline - l <- anyChar `manyTill` eolof - optional newline - return l - ---- ** tags - --- | Extract any tags (name:value ended by comma or newline) embedded in a string. --- --- >>> commentTags "a b:, c:c d:d, e" --- [("b",""),("c","c d:d")] --- --- >>> commentTags "a [1/1/1] [1/1] [1], [=1/1/1] [=1/1] [=1] [1/1=1/1/1] [1=1/1/1] b:c" --- [("b","c")] --- --- --[("date","1/1/1"),("date","1/1"),("date2","1/1/1"),("date2","1/1"),("date","1/1"),("date2","1/1/1"),("date","1"),("date2","1/1/1")] --- --- >>> commentTags "\na b:, \nd:e, f" --- [("b",""),("d","e")] --- -commentTags :: String -> [Tag] -commentTags s = - case runStringParser tagsp s of - Right r -> r - Left _ -> [] -- shouldn't happen - --- | Parse all tags found in a string. -tagsp :: StringParser u Identity [Tag] -tagsp = -- do - -- pdbg 0 $ "tagsp" - many (try (nontagp >> tagp)) - --- | Parse everything up till the first tag. --- --- >>> rsp nontagp "\na b:, \nd:e, f" --- Right "\na " -nontagp :: StringParser u Identity String -nontagp = -- do - -- pdbg 0 "nontagp" - -- anyChar `manyTill` (lookAhead (try (tagorbracketeddatetagsp Nothing >> return ()) <|> eof)) - anyChar `manyTill` lookAhead (try (void tagp) <|> eof) - -- XXX costly ? - --- | Tags begin with a colon-suffixed tag name (a word beginning with --- a letter) and are followed by a tag value (any text up to a comma --- or newline, whitespace-stripped). --- --- >>> rsp tagp "a:b b , c AuxDate: 4/2" --- Right ("a","b b") --- -tagp :: Monad m => StringParser u m Tag -tagp = do - -- pdbg 0 "tagp" - n <- tagnamep - v <- tagvaluep - return (n,v) - --- | --- >>> rsp tagnamep "a:" --- Right "a" -tagnamep :: Monad m => StringParser u m String -tagnamep = -- do - -- pdbg 0 "tagnamep" - many1 (noneOf ": \t\n") <* char ':' - -tagvaluep :: Monad m => StringParser u m String -tagvaluep = do - -- ptrace "tagvalue" - v <- anyChar `manyTill` (void (try (char ',')) <|> eolof) - return $ strip $ reverse $ dropWhile (==',') $ reverse $ strip v - ---- ** posting dates - --- | Parse all posting dates found in a string. Posting dates can be --- expressed with date/date2 tags and/or bracketed dates. The dates --- are parsed fully to give useful errors. Missing years can be --- inferred only if a default date is provided. --- -postingdatesp :: Maybe Day -> ErroringJournalParser [(TagName,Day)] -postingdatesp mdefdate = do - -- pdbg 0 $ "postingdatesp" - let p = ((:[]) <$> datetagp mdefdate) <|> bracketeddatetagsp mdefdate - nonp = - many (notFollowedBy p >> anyChar) - -- anyChar `manyTill` (lookAhead (try (p >> return ()) <|> eof)) - concat <$> many (try (nonp >> p)) - ---- ** date tags - --- | Date tags are tags with name "date" or "date2". Their value is --- parsed as a date, using the provided default date if any for --- inferring a missing year if needed. Any error in date parsing is --- reported and terminates parsing. --- --- >>> rejp (datetagp Nothing) "date: 2000/1/2 " --- Right ("date",2000-01-02) --- --- >>> rejp (datetagp (Just $ fromGregorian 2001 2 3)) "date2:3/4" --- Right ("date2",2001-03-04) --- --- >>> rejp (datetagp Nothing) "date: 3/4" --- Left ...line 1, column 9...year is unknown... --- -datetagp :: Maybe Day -> ErroringJournalParser (TagName,Day) -datetagp mdefdate = do - -- pdbg 0 "datetagp" - string "date" - n <- fromMaybe "" <$> optionMaybe (string "2") - char ':' - startpos <- getPosition - v <- tagvaluep - -- re-parse value as a date. - ctx <- getState - ep <- parseWithCtx - ctx{ctxYear=first3.toGregorian <$> mdefdate} - -- The value extends to a comma, newline, or end of file. - -- It seems like ignoring any extra stuff following a date - -- gives better errors here. - (do - setPosition startpos - datep) -- <* eof) - v - case ep - of Left e -> throwError $ show e - Right d -> return ("date"++n, d) - ---- ** bracketed dates - --- tagorbracketeddatetagsp :: Monad m => Maybe Day -> StringParser u m [Tag] --- tagorbracketeddatetagsp mdefdate = --- bracketeddatetagsp mdefdate <|> ((:[]) <$> tagp) - --- | Parse Ledger-style bracketed posting dates ([DATE=DATE2]), as --- "date" and/or "date2" tags. Anything that looks like an attempt at --- this (a square-bracketed sequence of 0123456789/-.= containing at --- least one digit and one date separator) is also parsed, and will --- throw an appropriate error. --- --- The dates are parsed in full here so that errors are reported in --- the right position. A missing year in DATE can be inferred if a --- default date is provided. A missing year in DATE2 will be inferred --- from DATE. --- --- >>> rejp (bracketeddatetagsp Nothing) "[2016/1/2=3/4]" --- Right [("date",2016-01-02),("date2",2016-03-04)] --- --- >>> rejp (bracketeddatetagsp Nothing) "[1]" --- Left ...not a bracketed date... --- --- >>> rejp (bracketeddatetagsp Nothing) "[2016/1/32]" --- Left ...line 1, column 11...bad date... --- --- >>> rejp (bracketeddatetagsp Nothing) "[1/31]" --- Left ...line 1, column 6...year is unknown... --- --- >>> rejp (bracketeddatetagsp Nothing) "[0123456789/-.=/-.=]" --- Left ...line 1, column 15...bad date, different separators... --- -bracketeddatetagsp :: Maybe Day -> ErroringJournalParser [(TagName, Day)] -bracketeddatetagsp mdefdate = do - -- pdbg 0 "bracketeddatetagsp" - char '[' - startpos <- getPosition - let digits = "0123456789" - s <- many1 (oneOf $ '=':digits++datesepchars) - char ']' - unless (any (`elem` s) digits && any (`elem` datesepchars) s) $ - parserFail "not a bracketed date" - - -- looks sufficiently like a bracketed date, now we - -- re-parse as dates and throw any errors - ctx <- getState - ep <- parseWithCtx - ctx{ctxYear=first3.toGregorian <$> mdefdate} - (do - setPosition startpos - md1 <- optionMaybe datep - maybe (return ()) (setYear.first3.toGregorian) md1 - md2 <- optionMaybe $ char '=' >> datep - eof - return (md1,md2) - ) - s - case ep - of Left e -> throwError $ show e - Right (md1,md2) -> return $ catMaybes - [("date",) <$> md1, ("date2",) <$> md2] - --- * more tests tests_Hledger_Read_JournalReader = TestList $ concat [ diff --git a/hledger-lib/Hledger/Read/TimeclockReader.hs b/hledger-lib/Hledger/Read/TimeclockReader.hs index 501ba4506..17ff43b40 100644 --- a/hledger-lib/Hledger/Read/TimeclockReader.hs +++ b/hledger-lib/Hledger/Read/TimeclockReader.hs @@ -43,6 +43,8 @@ i, o or O. The meanings of the codes are: module Hledger.Read.TimeclockReader ( -- * Reader reader, + -- * Misc other exports + timeclockfilep, -- * Tests tests_Hledger_Read_TimeclockReader ) @@ -59,9 +61,8 @@ import System.FilePath import Hledger.Data -- XXX too much reuse ? -import Hledger.Read.JournalReader ( - directivep, marketpricedirectivep, defaultyeardirectivep, emptyorcommentlinep, datetimep, - parseAndFinaliseJournal, modifiedaccountnamep, genericSourcePos +import Hledger.Read.Common ( + emptyorcommentlinep, datetimep, parseAndFinaliseJournal, modifiedaccountnamep, genericSourcePos ) import Hledger.Utils @@ -93,10 +94,8 @@ timeclockfilep = do items <- many timeclockitemp -- As all ledger line types can be distinguished by the first -- character, excepting transactions versus empty (blank or -- comment-only) lines, can use choice w/o try - timeclockitemp = choice [ directivep - , liftM (return . addMarketPrice) marketpricedirectivep - , defaultyeardirectivep - , emptyorcommentlinep >> return (return id) + timeclockitemp = choice [ + emptyorcommentlinep >> return (return id) , liftM (return . addTimeclockEntry) timeclockentryp ] "timeclock entry, or default year or historical price directive" diff --git a/hledger-lib/Hledger/Read/TimedotReader.hs b/hledger-lib/Hledger/Read/TimedotReader.hs index b9dfd7ce3..ac16a1af5 100644 --- a/hledger-lib/Hledger/Read/TimedotReader.hs +++ b/hledger-lib/Hledger/Read/TimedotReader.hs @@ -24,6 +24,8 @@ inc.client1 .... .... .. module Hledger.Read.TimedotReader ( -- * Reader reader, + -- * Misc other exports + timedotfilep, -- * Tests tests_Hledger_Read_TimedotReader ) @@ -40,9 +42,8 @@ import Text.Parsec hiding (parse) import System.FilePath import Hledger.Data --- XXX too much reuse ? -import Hledger.Read.JournalReader ( - datep, numberp, defaultyeardirectivep, emptyorcommentlinep, followingcommentp, +import Hledger.Read.Common ( + datep, numberp, emptyorcommentlinep, followingcommentp, parseAndFinaliseJournal, modifiedaccountnamep, genericSourcePos ) import Hledger.Utils hiding (ptrace) @@ -77,7 +78,6 @@ timedotfilep = do items <- many timedotfileitemp timedotfileitemp = do ptrace "timedotfileitemp" choice [ - defaultyeardirectivep, emptyorcommentlinep >> return (return id), liftM (return . addTransactions) timedotdayp ] "timedot day entry, or default year or comment line or blank line" diff --git a/hledger-lib/Hledger/Read/Util.hs b/hledger-lib/Hledger/Read/Util.hs deleted file mode 100644 index a9a1ee077..000000000 --- a/hledger-lib/Hledger/Read/Util.hs +++ /dev/null @@ -1,209 +0,0 @@ -{-# LANGUAGE ScopedTypeVariables #-} -module Hledger.Read.Util -where -import Control.Monad.Except -import Data.Maybe --- -import qualified Control.Exception as C --- import Control.Monad.Except -import Data.List --- import Data.Maybe -import System.Directory (doesFileExist, getHomeDirectory) -import System.Environment (getEnv) -import System.Exit (exitFailure) -import System.FilePath (()) -import System.IO (IOMode(..), openFile, stdin, stderr, hSetNewlineMode, universalNewlineMode) -import Test.HUnit -import Text.Printf - -import Hledger.Data.Dates (getCurrentDay) -import Hledger.Data.Journal () -- Show instance -import Hledger.Data.Types -import Hledger.Read.JournalReader as JournalReader -import Hledger.Read.TimedotReader as TimedotReader -import Hledger.Read.TimeclockReader as TimeclockReader -import Hledger.Read.CsvReader as CsvReader -import Hledger.Utils -import Prelude hiding (getContents, writeFile) -import Hledger.Utils.UTF8IOCompat (hGetContents, writeFile) - - -journalEnvVar = "LEDGER_FILE" -journalEnvVar2 = "LEDGER" -journalDefaultFilename = ".hledger.journal" - --- The available data file readers, each one handling a particular data --- format. The first is also used as the default for unknown formats. -readers :: [Reader] -readers = [ - JournalReader.reader - ,TimeclockReader.reader - ,TimedotReader.reader - ,CsvReader.reader - ] - -readFormatNames :: [StorageFormat] -readFormatNames = map rFormat readers - --- | Which readers are worth trying for this (possibly unspecified) format, filepath, and data ? -readersFor :: (Maybe StorageFormat, Maybe FilePath, String) -> [Reader] -readersFor (format,path,s) = - dbg1 ("possible readers for "++show (format,path,elideRight 30 s)) $ - case format of - Just f -> case readerForStorageFormat f of Just r -> [r] - Nothing -> [] - Nothing -> case path of Nothing -> readers - Just p -> case readersForPathAndData (p,s) of [] -> readers - rs -> rs - --- | Find the (first) reader which can handle the given format, if any. -readerForStorageFormat :: StorageFormat -> Maybe Reader -readerForStorageFormat s | null rs = Nothing - | otherwise = Just $ head rs - where - rs = filter ((s==).rFormat) readers :: [Reader] - --- | Find the readers which think they can handle the given file path and data, if any. -readersForPathAndData :: (FilePath,String) -> [Reader] -readersForPathAndData (f,s) = filter (\r -> (rDetector r) f s) readers - --- try each reader in turn, returning the error of the first if all fail -tryReaders :: [Reader] -> Maybe FilePath -> Bool -> Maybe FilePath -> String -> IO (Either String Journal) -tryReaders readers mrulesfile assrt path s = firstSuccessOrBestError [] readers - where - firstSuccessOrBestError :: [String] -> [Reader] -> IO (Either String Journal) - firstSuccessOrBestError [] [] = return $ Left "no readers found" - firstSuccessOrBestError errs (r:rs) = do - dbg1IO "trying reader" (rFormat r) - result <- (runExceptT . (rParser r) mrulesfile assrt path') s - dbg1IO "reader result" $ either id show result - case result of Right j -> return $ Right j -- success! - Left e -> firstSuccessOrBestError (errs++[e]) rs -- keep trying - firstSuccessOrBestError (e:_) [] = return $ Left e -- none left, return first error - path' = fromMaybe "(string)" path - - --- | Read a journal from this string, trying whatever readers seem appropriate: --- --- - if a format is specified, try that reader only --- --- - or if one or more readers recognises the file path and data, try those --- --- - otherwise, try them all. --- --- A CSV conversion rules file may also be specified for use by the CSV reader. --- Also there is a flag specifying whether to check or ignore balance assertions in the journal. -readJournal :: Maybe StorageFormat -> Maybe FilePath -> Bool -> Maybe FilePath -> String -> IO (Either String Journal) -readJournal mformat mrulesfile assrt path s = tryReaders (readersFor (mformat, path, s)) mrulesfile assrt path s - --- | Read a Journal from this file (or stdin if the filename is -) or give --- an error message, using the specified data format or trying all known --- formats. A CSV conversion rules file may be specified for better --- conversion of that format. Also there is a flag specifying whether --- to check or ignore balance assertions in the journal. -readJournalFile :: Maybe StorageFormat -> Maybe FilePath -> Bool -> FilePath -> IO (Either String Journal) -readJournalFile format rulesfile assrt f = readJournalFiles format rulesfile assrt [f] - -readJournalFiles :: Maybe StorageFormat -> Maybe FilePath -> Bool -> [FilePath] -> IO (Either String Journal) -readJournalFiles format rulesfile assrt fs = do - contents <- fmap concat $ mapM readFileAnyNewline fs - readJournal format rulesfile assrt (listToMaybe fs) contents - where - readFileAnyNewline f = do - requireJournalFileExists f - h <- fileHandle f - hSetNewlineMode h universalNewlineMode - hGetContents h - fileHandle "-" = return stdin - fileHandle f = openFile f ReadMode - --- | If the specified journal file does not exist, give a helpful error and quit. -requireJournalFileExists :: FilePath -> IO () -requireJournalFileExists "-" = return () -requireJournalFileExists f = do - exists <- doesFileExist f - when (not exists) $ do - hPrintf stderr "The hledger journal file \"%s\" was not found.\n" f - hPrintf stderr "Please create it first, eg with \"hledger add\" or a text editor.\n" - hPrintf stderr "Or, specify an existing journal file with -f or LEDGER_FILE.\n" - exitFailure - --- | Ensure there is a journal file at the given path, creating an empty one if needed. -ensureJournalFileExists :: FilePath -> IO () -ensureJournalFileExists f = do - exists <- doesFileExist f - when (not exists) $ do - hPrintf stderr "Creating hledger journal file %s.\n" f - -- note Hledger.Utils.UTF8.* do no line ending conversion on windows, - -- we currently require unix line endings on all platforms. - newJournalContent >>= writeFile f - --- | Give the content for a new auto-created journal file. -newJournalContent :: IO String -newJournalContent = do - d <- getCurrentDay - return $ printf "; journal created %s by hledger\n" (show d) - --- | Get the default journal file path specified by the environment. --- Like ledger, we look first for the LEDGER_FILE environment --- variable, and if that does not exist, for the legacy LEDGER --- environment variable. If neither is set, or the value is blank, --- return the hard-coded default, which is @.hledger.journal@ in the --- users's home directory (or in the current directory, if we cannot --- determine a home directory). -defaultJournalPath :: IO String -defaultJournalPath = do - s <- envJournalPath - if null s then defaultJournalPath else return s - where - envJournalPath = - getEnv journalEnvVar - `C.catch` (\(_::C.IOException) -> getEnv journalEnvVar2 - `C.catch` (\(_::C.IOException) -> return "")) - defaultJournalPath = do - home <- getHomeDirectory `C.catch` (\(_::C.IOException) -> return "") - return $ home journalDefaultFilename - --- | Read the default journal file specified by the environment, or raise an error. -defaultJournal :: IO Journal -defaultJournal = defaultJournalPath >>= readJournalFile Nothing Nothing True >>= either error' return - --- | Read a journal from the given string, trying all known formats, or simply throw an error. -readJournal' :: String -> IO Journal -readJournal' s = readJournal Nothing Nothing True Nothing s >>= either error' return - -tests_readJournal' = [ - "readJournal' parses sample journal" ~: do - _ <- samplejournal - assertBool "" True - ] - --- tests - -samplejournal = readJournal' $ unlines - ["2008/01/01 income" - ," assets:bank:checking $1" - ," income:salary" - ,"" - ,"comment" - ,"multi line comment here" - ,"for testing purposes" - ,"end comment" - ,"" - ,"2008/06/01 gift" - ," assets:bank:checking $1" - ," income:gifts" - ,"" - ,"2008/06/02 save" - ," assets:bank:saving $1" - ," assets:bank:checking" - ,"" - ,"2008/06/03 * eat & shop" - ," expenses:food $1" - ," expenses:supplies $1" - ," assets:cash" - ,"" - ,"2008/12/31 * pay off" - ," liabilities:debts $1" - ," assets:bank:checking" - ] diff --git a/hledger-lib/doc/hledger_journal.5.m4.md b/hledger-lib/doc/hledger_journal.5.m4.md index 39c2e36b1..0ead1248e 100644 --- a/hledger-lib/doc/hledger_journal.5.m4.md +++ b/hledger-lib/doc/hledger_journal.5.m4.md @@ -693,11 +693,10 @@ include path/to/file.journal ``` If the path does not begin with a slash, it is relative to the current file. - Glob patterns (`*`) are not currently supported. -The `include` directive may only be used in journal files, and currently -it may only include other journal files (eg, not CSV or timeclock files.) +The `include` directive can only be used in journal files. +It can include journal, timeclock or timedot files, but not CSV files. # EDITOR SUPPORT diff --git a/hledger-lib/doc/hledger_timeclock.5.m4.md b/hledger-lib/doc/hledger_timeclock.5.m4.md index 70026acd8..2b3085ae9 100644 --- a/hledger-lib/doc/hledger_timeclock.5.m4.md +++ b/hledger-lib/doc/hledger_timeclock.5.m4.md @@ -21,7 +21,7 @@ hledger can read timeclock files. these are (a subset of) [timeclock.el](http://www.emacswiki.org/emacs/TimeClock)'s format, containing clock-in and clock-out entries as in the example below. -The date is a [simple date](#simple-dates) (also, [default year directives](#default-year) work). +The date is a [simple date](#simple-dates). The time format is HH:MM[:SS][+-ZZZZ]. Seconds and timezone are optional. The timezone, if present, must be four digits and is ignored (currently the time is always interpreted as a local time). diff --git a/hledger-lib/doc/hledger_timedot.5.m4.md b/hledger-lib/doc/hledger_timedot.5.m4.md index 63ff3ca26..591d93943 100644 --- a/hledger-lib/doc/hledger_timedot.5.m4.md +++ b/hledger-lib/doc/hledger_timedot.5.m4.md @@ -109,8 +109,6 @@ $ hledger -f t.timedot --alias /\\./=: bal date:2016/2/4 4.50 ``` -[default year directives](#default-year) may be used. - Here is a [sample.timedot](https://raw.github.com/simonmichael/hledger/master/data/sample.timedot). diff --git a/hledger-lib/hledger-lib.cabal b/hledger-lib/hledger-lib.cabal index 73c14ba95..7e948c5ab 100644 --- a/hledger-lib/hledger-lib.cabal +++ b/hledger-lib/hledger-lib.cabal @@ -115,11 +115,11 @@ library Hledger.Data.Types Hledger.Query Hledger.Read + Hledger.Read.Common Hledger.Read.CsvReader Hledger.Read.JournalReader Hledger.Read.TimedotReader Hledger.Read.TimeclockReader - Hledger.Read.Util Hledger.Reports Hledger.Reports.ReportOptions Hledger.Reports.BalanceHistoryReport