diff --git a/hledger-lib/Hledger/Data/Dates.hs b/hledger-lib/Hledger/Data/Dates.hs index 0925d12c1..0e2337821 100644 --- a/hledger-lib/Hledger/Data/Dates.hs +++ b/hledger-lib/Hledger/Data/Dates.hs @@ -91,7 +91,6 @@ import Data.Time.Calendar import Data.Time.Calendar.OrdinalDate import Data.Time.Clock import Data.Time.LocalTime -import Data.Void (Void) import Safe (headMay, lastMay, readMay) import Text.Megaparsec import Text.Megaparsec.Char @@ -313,7 +312,7 @@ earliest (Just d1) (Just d2) = Just $ min d1 d2 -- | Parse a period expression to an Interval and overall DateSpan using -- the provided reference date, or return a parse error. -parsePeriodExpr :: Day -> Text -> Either (ParseError Char Void) (Interval, DateSpan) +parsePeriodExpr :: Day -> Text -> Either (ParseError Char CustomErr) (Interval, DateSpan) parsePeriodExpr refdate s = parsewith (periodexpr refdate <* eof) (T.toLower s) maybePeriod :: Day -> Text -> Maybe (Interval,DateSpan) @@ -373,13 +372,13 @@ fixSmartDateStr :: Day -> Text -> String fixSmartDateStr d s = either (\e->error' $ printf "could not parse date %s %s" (show s) (show e)) id - $ (fixSmartDateStrEither d s :: Either (ParseError Char Void) String) + $ (fixSmartDateStrEither d s :: Either (ParseError Char CustomErr) String) -- | A safe version of fixSmartDateStr. -fixSmartDateStrEither :: Day -> Text -> Either (ParseError Char Void) String +fixSmartDateStrEither :: Day -> Text -> Either (ParseError Char CustomErr) String fixSmartDateStrEither d = either Left (Right . showDate) . fixSmartDateStrEither' d -fixSmartDateStrEither' :: Day -> Text -> Either (ParseError Char Void) Day +fixSmartDateStrEither' :: Day -> Text -> Either (ParseError Char CustomErr) Day fixSmartDateStrEither' d s = case parsewith smartdateonly (T.toLower s) of Right sd -> Right $ fixSmartDate d sd Left e -> Left e diff --git a/hledger-lib/Hledger/Data/Types.hs b/hledger-lib/Hledger/Data/Types.hs index b2a05e0d0..a6af98953 100644 --- a/hledger-lib/Hledger/Data/Types.hs +++ b/hledger-lib/Hledger/Data/Types.hs @@ -183,6 +183,7 @@ instance NFData PostingType type TagName = Text type TagValue = Text type Tag = (TagName, TagValue) -- ^ A tag name and (possibly empty) value. +type DateTag = (TagName, Day) -- | The status of a transaction or posting, recorded with a status mark -- (nothing, !, or *). What these mean is ultimately user defined. diff --git a/hledger-lib/Hledger/Read/Common.hs b/hledger-lib/Hledger/Read/Common.hs index 427a1ec54..b680320cd 100644 --- a/hledger-lib/Hledger/Read/Common.hs +++ b/hledger-lib/Hledger/Read/Common.hs @@ -14,6 +14,7 @@ Some of these might belong in Hledger.Read.JournalReader or Hledger.Read. --- * module {-# LANGUAGE CPP, BangPatterns, DeriveDataTypeable, RecordWildCards, NamedFieldPuns, NoMonoLocalBinds, ScopedTypeVariables, FlexibleContexts, TupleSections, OverloadedStrings #-} +{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE PackageImports #-} @@ -28,8 +29,6 @@ module Hledger.Read.Common ( rtp, runJournalParser, rjp, - runErroringJournalParser, - rejp, genericSourcePos, journalSourcePos, generateAutomaticPostings, @@ -49,7 +48,6 @@ module Hledger.Read.Common ( getAccountAliases, clearAccountAliases, journalAddFile, - parserErrorAt, -- * parsers -- ** transaction bits @@ -82,12 +80,10 @@ module Hledger.Read.Common ( -- ** comments multilinecommentp, emptyorcommentlinep, - followingcommentp, - followingcommentandtagsp, - -- ** tags - commentTags, - tagsp, + followingcommentp, + transactioncommentp, + postingcommentp, -- ** bracketed dates bracketeddatetagsp @@ -97,8 +93,9 @@ where import Prelude () import "base-compat-batteries" Prelude.Compat hiding (readFile) import "base-compat-batteries" Control.Monad.Compat -import Control.Monad.Except (ExceptT(..), runExceptT, throwError) --, catchError) +import Control.Monad.Except (ExceptT(..), throwError) import Control.Monad.State.Strict +import Data.Bifunctor (bimap, second) import Data.Char import Data.Data import Data.Decimal (DecimalRaw (Decimal), Decimal) @@ -113,11 +110,11 @@ import Data.Text (Text) import qualified Data.Text as T import Data.Time.Calendar import Data.Time.LocalTime -import Data.Void (Void) import System.Time (getClockTime) import Text.Megaparsec import Text.Megaparsec.Char import Text.Megaparsec.Char.Lexer (decimal) +import Text.Megaparsec.Custom import Hledger.Data import Hledger.Utils @@ -184,21 +181,15 @@ rawOptsToInputOpts rawopts = InputOpts{ --- * parsing utilities -- | Run a string parser with no state in the identity monad. -runTextParser, rtp :: TextParser Identity a -> Text -> Either (ParseError Char Void) a +runTextParser, rtp :: TextParser Identity a -> Text -> Either (ParseError Char CustomErr) a runTextParser p t = runParser p "" t rtp = runTextParser -- | Run a journal parser with a null journal-parsing state. -runJournalParser, rjp :: Monad m => JournalParser m a -> Text -> m (Either (ParseError Char Void) a) +runJournalParser, rjp :: Monad m => JournalParser m a -> Text -> m (Either (ParseError Char CustomErr) a) runJournalParser p t = runParserT (evalStateT p mempty) "" t rjp = runJournalParser --- | Run an error-raising journal parser with a null journal-parsing state. -runErroringJournalParser, rejp :: Monad m => ErroringJournalParser m a -> Text -> m (Either String a) -runErroringJournalParser p t = runExceptT $ - runJournalParser p t >>= either (throwError . parseErrorPretty) return -rejp = runErroringJournalParser - genericSourcePos :: SourcePos -> GenericSourcePos genericSourcePos p = GenericSourcePos (sourceName p) (fromIntegral . unPos $ sourceLine p) (fromIntegral . unPos $ sourceColumn p) @@ -219,19 +210,19 @@ generateAutomaticPostings j = j { jtxns = map modifier $ jtxns j } -- | Given a megaparsec ParsedJournal parser, input options, file -- path and file content: parse and post-process a Journal, or give an error. -parseAndFinaliseJournal :: ErroringJournalParser IO ParsedJournal -> InputOpts +parseAndFinaliseJournal :: JournalParser IO ParsedJournal -> InputOpts -> FilePath -> Text -> ExceptT String IO Journal parseAndFinaliseJournal parser iopts f txt = do t <- liftIO getClockTime y <- liftIO getCurrentYear - ep <- runParserT (evalStateT parser nulljournal {jparsedefaultyear=Just y}) f txt + ep <- liftIO $ runParserT (evalStateT parser nulljournal {jparsedefaultyear=Just y}) f txt case ep of Right pj -> let pj' = if auto_ iopts then generateAutomaticPostings pj else pj in case journalFinalise t f txt (not $ ignore_assertions_ iopts) pj' of Right j -> return j Left e -> throwError e - Left e -> throwError $ parseErrorPretty e + Left e -> throwError $ customParseErrorPretty txt e parseAndFinaliseJournal' :: JournalParser Identity ParsedJournal -> InputOpts -> FilePath -> Text -> ExceptT String IO Journal @@ -319,18 +310,6 @@ journalAddFile f j@Journal{jfiles=fs} = j{jfiles=fs++[f]} -- append, unlike the other fields, even though we do a final reverse, -- to compensate for additional reversal due to including/monoid-concatting --- -- | Terminate parsing entirely, returning the given error message --- -- with the current parse position prepended. --- parserError :: String -> ErroringJournalParser a --- parserError s = do --- pos <- getPosition --- parserErrorAt pos s - --- | Terminate parsing entirely, returning the given error message --- with the given parse position prepended. -parserErrorAt :: Monad m => SourcePos -> String -> ErroringJournalParser m a -parserErrorAt pos s = throwError $ sourcePosPretty pos ++ ":\n" ++ s - --- * parsers --- ** transaction bits @@ -348,7 +327,7 @@ codep = option "" $ try $ do skipSome spacenonewline between (char '(') (char ')') $ takeWhileP Nothing (/= ')') -descriptionp :: JournalParser m Text +descriptionp :: TextParser m Text descriptionp = takeWhileP Nothing (not . semicolonOrNewline) where semicolonOrNewline c = c == ';' || c == '\n' @@ -365,38 +344,47 @@ datep = do datep' :: Maybe Year -> TextParser m Day datep' mYear = do + startPos <- getPosition d1 <- decimal "year or month" sep <- satisfy isDateSepChar "date separator" d2 <- decimal "month or day" - fullDate d1 sep d2 <|> partialDate mYear d1 sep d2 + fullDate startPos d1 sep d2 <|> partialDate startPos mYear d1 sep d2 "full or partial date" where - fullDate :: Integer -> Char -> Int -> TextParser m Day - fullDate year sep1 month = do + fullDate :: SourcePos -> Integer -> Char -> Int -> TextParser m Day + fullDate startPos year sep1 month = do sep2 <- satisfy isDateSepChar "date separator" day <- decimal "day" + endPos <- getPosition let dateStr = show year ++ [sep1] ++ show month ++ [sep2] ++ show day - when (sep1 /= sep2) $ fail $ + when (sep1 /= sep2) $ parseErrorAtRegion startPos endPos $ "invalid date (mixing date separators is not allowed): " ++ dateStr case fromGregorianValid year month day of - Nothing -> fail $ "well-formed but invalid date: " ++ dateStr + Nothing -> parseErrorAtRegion startPos endPos $ + "well-formed but invalid date: " ++ dateStr Just date -> pure $! date - partialDate :: Maybe Year -> Integer -> Char -> Int -> TextParser m Day - partialDate mYear month sep day = case mYear of - Just year -> - case fromGregorianValid year (fromIntegral month) day of - Nothing -> fail $ "well-formed but invalid date: " ++ dateStr - Just date -> pure $! date - where dateStr = show year ++ [sep] ++ show month ++ [sep] ++ show day + partialDate + :: SourcePos -> Maybe Year -> Integer -> Char -> Int -> TextParser m Day + partialDate startPos mYear month sep day = do + endPos <- getPosition + case mYear of + Just year -> + case fromGregorianValid year (fromIntegral month) day of + Nothing -> parseErrorAtRegion startPos endPos $ + "well-formed but invalid date: " ++ dateStr + Just date -> pure $! date + where dateStr = show year ++ [sep] ++ show month ++ [sep] ++ show day - Nothing -> fail $ - "partial date "++dateStr++" found, but the current year is unknown" - where dateStr = show month ++ [sep] ++ show day + Nothing -> parseErrorAtRegion startPos endPos $ + "partial date "++dateStr++" found, but the current year is unknown" + where dateStr = show month ++ [sep] ++ show day + +{-# INLINABLE datep' #-} -- | Parse a date and time in YYYY/MM/DD HH:MM[:SS][+-ZZZZ] format. -- Hyphen (-) and period (.) are also allowed as date separators. @@ -469,7 +457,7 @@ accountnamep = do -- | Parse whitespace then an amount, with an optional left or right -- currency symbol and optional price, or return the special -- "missing" marker amount. -spaceandamountormissingp :: Monad m => JournalParser m MixedAmount +spaceandamountormissingp :: JournalParser m MixedAmount spaceandamountormissingp = option missingmixedamt $ try $ do lift $ skipSome spacenonewline @@ -492,13 +480,13 @@ test_spaceandamountormissingp = do -- | Parse a single-commodity amount, with optional symbol on the left or -- right, optional unit or total price, and optional (ignored) -- ledger-style balance assertion or fixed lot price declaration. -amountp :: Monad m => JournalParser m Amount +amountp :: JournalParser m Amount amountp = do amount <- amountwithoutpricep price <- priceamountp pure $ amount { aprice = price } -amountwithoutpricep :: Monad m => JournalParser m Amount +amountwithoutpricep :: JournalParser m Amount amountwithoutpricep = try leftsymbolamountp <|> try rightsymbolamountp <|> nosymbolamountp @@ -545,7 +533,7 @@ skipMany' p = go False then go True else pure isNull -leftsymbolamountp :: Monad m => JournalParser m Amount +leftsymbolamountp :: JournalParser m Amount leftsymbolamountp = do sign <- lift signp m <- lift multiplierp @@ -557,7 +545,7 @@ leftsymbolamountp = do return $ Amount c (sign q) NoPrice s m "left-symbol amount" -rightsymbolamountp :: Monad m => JournalParser m Amount +rightsymbolamountp :: JournalParser m Amount rightsymbolamountp = do m <- lift multiplierp sign <- lift signp @@ -576,7 +564,7 @@ rightsymbolamountp = do return $ Amount c (sign q) NoPrice s m "right-symbol amount" -nosymbolamountp :: Monad m => JournalParser m Amount +nosymbolamountp :: JournalParser m Amount nosymbolamountp = do m <- lift multiplierp suggestedStyle <- getDefaultAmountStyle @@ -601,7 +589,7 @@ quotedcommoditysymbolp = simplecommoditysymbolp :: TextParser m CommoditySymbol simplecommoditysymbolp = takeWhile1P Nothing (not . isNonsimpleCommodityChar) -priceamountp :: Monad m => JournalParser m Price +priceamountp :: JournalParser m Price priceamountp = option NoPrice $ try $ do lift (skipMany spacenonewline) char '@' @@ -612,7 +600,7 @@ priceamountp = option NoPrice $ try $ do pure $ priceConstructor priceAmount -partialbalanceassertionp :: Monad m => JournalParser m BalanceAssertion +partialbalanceassertionp :: JournalParser m BalanceAssertion partialbalanceassertionp = optional $ try $ do lift (skipMany spacenonewline) sourcepos <- genericSourcePos <$> lift getPosition @@ -632,7 +620,7 @@ partialbalanceassertionp = optional $ try $ do -- <|> return Nothing -- http://ledger-cli.org/3.0/doc/ledger3.html#Fixing-Lot-Prices -fixedlotpricep :: Monad m => JournalParser m (Maybe Amount) +fixedlotpricep :: JournalParser m (Maybe Amount) fixedlotpricep = optional $ try $ do lift (skipMany spacenonewline) char '{' @@ -885,211 +873,235 @@ data AmbiguousNumber = AmbiguousNumber DigitGrp Char DigitGrp -- 1,000 multilinecommentp :: TextParser m () multilinecommentp = startComment *> anyLine `skipManyTill` endComment where - startComment = string "comment" >> skipLine - endComment = eof <|> string "end comment" *> skipLine + startComment = string "comment" *> trailingSpaces + endComment = eof <|> string "end comment" *> trailingSpaces - skipLine = void $ skipMany spacenonewline *> newline - anyLine = takeWhileP Nothing (\c -> c /= '\n') *> newline + trailingSpaces = skipMany spacenonewline <* newline + anyLine = void $ takeWhileP Nothing (\c -> c /= '\n') *> newline + +{-# INLINABLE multilinecommentp #-} emptyorcommentlinep :: TextParser m () emptyorcommentlinep = do skipMany spacenonewline - void linecommentp <|> void newline + skiplinecommentp <|> void newline + where + -- A line (file-level) comment can start with a semicolon, hash, or star + -- (allowing org nodes). + skiplinecommentp :: TextParser m () + skiplinecommentp = do + satisfy $ \c -> c == ';' || c == '#' || c == '*' + void $ takeWhileP Nothing (\c -> c /= '\n') + optional newline + pure () --- | Parse a possibly multi-line comment following a semicolon. -followingcommentp :: TextParser m Text -followingcommentp = T.unlines . map snd <$> followingcommentlinesp +{-# INLINABLE emptyorcommentlinep #-} -followingcommentlinesp :: TextParser m [(SourcePos, Text)] -followingcommentlinesp = do - skipMany spacenonewline - - samelineComment@(_, samelineCommentText) - <- try commentp <|> (,) <$> (getPosition <* eolof) <*> pure "" - newlineComments <- many $ try $ do - skipSome spacenonewline -- leading whitespace is required - commentp - - if T.null samelineCommentText && null newlineComments - then pure [] - else pure $ samelineComment : newlineComments - --- | Parse a possibly multi-line comment following a semicolon, and --- any tags and/or posting dates within it. Posting dates can be --- expressed with "date"/"date2" tags and/or bracketed dates. The --- dates are parsed in full here so that errors are reported in the --- right position. Missing years can be inferred if a default date is --- provided. +-- A parser combinator for parsing (possibly multiline) comments +-- following journal items. -- --- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; a:b, date:3/4, [=5/6]" +-- Several journal items may be followed by comments, which begin with +-- semicolons and extend to the end of the line. Such comments may span +-- multiple lines, but comment lines below the journal item must be +-- preceeded by leading whitespace. +-- +-- This parser combinator accepts a parser that consumes all input up +-- until the next newline. This parser should extract the "content" from +-- comments. The resulting parser returns this content plus the raw text +-- of the comment itself. +followingcommentp' :: (Monoid a) => TextParser m a -> TextParser m (Text, a) +followingcommentp' contentp = do + skipMany spacenonewline + sameLine <- try headerp *> match' contentp <|> pure ("", mempty) + _ <- eolof + lowerLines <- many $ + try (skipSome spacenonewline *> headerp) *> match' contentp <* eolof + + let (textLines, results) = unzip $ sameLine : lowerLines + strippedCommentText = T.unlines $ map T.strip textLines + result = mconcat results + pure (strippedCommentText, result) + + where + headerp = char ';' *> skipMany spacenonewline + +{-# INLINABLE followingcommentp' #-} + +-- | Parse the text of a (possibly multiline) comment following a journal +-- item. +followingcommentp :: TextParser m Text +followingcommentp = + fst <$> followingcommentp' (void $ takeWhileP Nothing (/= '\n')) +{-# INLINABLE followingcommentp #-} + + +-- | Parse a transaction comment and extract its tags. +-- +-- The first line of a transaction may be followed by comments, which +-- begin with semicolons and extend to the end of the line. Transaction +-- comments may span multiple lines, but comment lines below the +-- transaction must be preceeded by leading whitespace. +-- +-- 2000/1/1 ; a transaction comment starting on the same line ... +-- ; extending to the next line +-- account1 $1 +-- account2 +-- +-- Tags are name-value pairs. +-- +-- >>> let getTags (_,tags) = tags +-- >>> let parseTags = fmap getTags . rtp transactioncommentp +-- +-- >>> parseTags "; name1: val1, name2:all this is value2" +-- Right [("name1","val1"),("name2","all this is value2")] +-- +-- A tag's name must be immediately followed by a colon, without +-- separating whitespace. The corresponding value consists of all the text +-- following the colon up until the next colon or newline, stripped of +-- leading and trailing whitespace. +-- +transactioncommentp :: TextParser m (Text, [Tag]) +transactioncommentp = followingcommentp' commenttagsp +{-# INLINABLE transactioncommentp #-} + +commenttagsp :: TextParser m [Tag] +commenttagsp = do + tagName <- fmap (last . T.split isSpace) + $ takeWhileP Nothing (\c -> c /= ':' && c /= '\n') + atColon tagName <|> pure [] -- if not ':', then either '\n' or EOF + + where + atColon :: Text -> TextParser m [Tag] + atColon name = char ':' *> do + if T.null name + then commenttagsp + else do + skipMany spacenonewline + val <- tagValue + let tag = (name, val) + (tag:) <$> commenttagsp + + tagValue :: TextParser m Text + tagValue = do + val <- T.strip <$> takeWhileP Nothing (\c -> c /= ',' && c /= '\n') + _ <- optional $ char ',' + pure val + +{-# INLINABLE commenttagsp #-} + + +-- | Parse a posting comment and extract its tags and dates. +-- +-- Postings may be followed by comments, which begin with semicolons and +-- extend to the end of the line. Posting comments may span multiple +-- lines, but comment lines below the posting must be preceeded by +-- leading whitespace. +-- +-- 2000/1/1 +-- account1 $1 ; a posting comment starting on the same line ... +-- ; extending to the next line +-- +-- account2 +-- ; a posting comment beginning on the next line +-- +-- Tags are name-value pairs. +-- +-- >>> let getTags (_,tags,_,_) = tags +-- >>> let parseTags = fmap getTags . rtp (postingcommentp Nothing) +-- +-- >>> parseTags "; name1: val1, name2:all this is value2" +-- Right [("name1","val1"),("name2","all this is value2")] +-- +-- A tag's name must be immediately followed by a colon, without +-- separating whitespace. The corresponding value consists of all the text +-- following the colon up until the next colon or newline, stripped of +-- leading and trailing whitespace. +-- +-- Posting dates may be expressed with "date"/"date2" tags or with +-- bracketed date syntax. Posting dates will inherit their year from the +-- transaction date if the year is not specified. We throw parse errors on +-- invalid dates. +-- +-- >>> let getDates (_,_,d1,d2) = (d1, d2) +-- >>> let parseDates = fmap getDates . rtp (postingcommentp (Just 2000)) +-- +-- >>> parseDates "; date: 1/2, date2: 1999/12/31" +-- Right (Just 2000-01-02,Just 1999-12-31) +-- >>> parseDates "; [1/2=1999/12/31]" +-- Right (Just 2000-01-02,Just 1999-12-31) +-- +-- Example: tags, date tags, and bracketed dates +-- >>> rtp (postingcommentp (Just 2000)) "; a:b, date:3/4, [=5/6]" -- Right ("a:b, date:3/4, [=5/6]\n",[("a","b"),("date","3/4")],Just 2000-03-04,Just 2000-05-06) -- --- Year unspecified and no default provided -> unknown year error, at correct position: --- >>> rejp (followingcommentandtagsp Nothing) " ; xxx date:3/4\n ; second line" --- Left ...1:22...partial date 3/4 found, but the current year is unknown... --- --- Date tag value contains trailing text - forgot the comma, confused: --- the syntaxes ? We'll accept the leading date anyway --- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; date:3/4=5/6" +-- Example: extraction of dates from date tags ignores trailing text +-- >>> rtp (postingcommentp (Just 2000)) "; date:3/4=5/6" -- Right ("date:3/4=5/6\n",[("date","3/4=5/6")],Just 2000-03-04,Nothing) -- -followingcommentandtagsp - :: Monad m - => Maybe Day - -> ErroringJournalParser m (Text, [Tag], Maybe Day, Maybe Day) -followingcommentandtagsp mdefdate = do - -- pdbg 0 "followingcommentandtagsp" +postingcommentp + :: Maybe Year -> TextParser m (Text, [Tag], Maybe Day, Maybe Day) +postingcommentp mYear = do + (commentText, (tags, dateTags)) <- + followingcommentp' (commenttagsanddatesp mYear) + let mdate = fmap snd $ find ((=="date") .fst) dateTags + mdate2 = fmap snd $ find ((=="date2").fst) dateTags + pure (commentText, tags, mdate, mdate2) +{-# INLINABLE postingcommentp #-} - commentLines <- lift followingcommentlinesp - -- pdbg 0 $ "commentws:" ++ show commentLines - -- Reparse the comment for any tags. - tagsWithPositions <- case - traverse (runTextParserAt tagswithvaluepositions) commentLines of - Right tss -> pure $ concat tss - Left e -> throwError $ parseErrorPretty e - - -- Extract date-tag style posting dates from the tags. - -- Use the transaction date for defaults, if provided. - let isDateLabel txt = txt == "date" || txt == "date2" - isDateTag = isDateLabel . fst . snd - tagDates <- case traverse tagDate $ filter isDateTag tagsWithPositions of - Right ds -> pure ds - Left e -> throwError $ parseErrorPretty e - - -- Reparse the comment for any bracketed style posting dates. - -- Use the transaction date for defaults, if provided. - bracketedDates <- case - traverse (runTextParserAt (bracketedpostingdatesp mdefdate)) - commentLines of - Right dss -> pure $ concat dss - Left e -> throwError $ parseErrorPretty e - - let pdates = tagDates ++ bracketedDates - mdate = fmap snd $ find ((=="date") .fst) pdates - mdate2 = fmap snd $ find ((=="date2").fst) pdates - -- pdbg 0 $ "allDates: "++show pdates - - let strippedComment = T.unlines $ map (T.strip . snd) commentLines - tags = map snd tagsWithPositions - -- pdbg 0 $ "comment:"++show strippedComment - - pure (strippedComment, tags, mdate, mdate2) +commenttagsanddatesp + :: Maybe Year -> TextParser m ([Tag], [DateTag]) +commenttagsanddatesp mYear = do + (txt, dateTags) <- match $ readUpTo ':' + -- next char is either ':' or '\n' (or EOF) + let tagName = last (T.split isSpace txt) + (fmap.second) (dateTags++) (atColon tagName) <|> pure ([], dateTags) -- if not ':', then either '\n' or EOF where - runTextParserAt parser (pos, txt) = - runTextParser (setPosition pos *> parser) txt + readUpTo :: Char -> TextParser m [DateTag] + readUpTo end = do + void $ takeWhileP Nothing (\c -> c /= end && c /= '\n' && c /= '[') + -- if not '[' then ':' or '\n' or EOF + atBracket (readUpTo end) <|> pure [] - tagDate :: (SourcePos, Tag) - -> Either (ParseError Char Void) (TagName, Day) - tagDate (pos, (name, value)) = - fmap (name,) $ runTextParserAt (datep' myear) (pos, value) - where myear = fmap (first3 . toGregorian) mdefdate + atBracket :: TextParser m [DateTag] -> TextParser m [DateTag] + atBracket cont = do + -- Uses the fact that bracketed date-tags cannot contain newlines + dateTags <- option [] $ lookAhead (bracketeddatetagsp mYear) + _ <- char '[' + dateTags' <- cont + pure $ dateTags ++ dateTags' --- A transaction/posting comment must start with a semicolon. This parser --- discards the leading whitespace of the comment and returns the source --- position of the comment's first non-whitespace character. -commentp :: TextParser m (SourcePos, Text) -commentp = commentStartingWithp (==';') + atColon :: Text -> TextParser m ([Tag], [DateTag]) + atColon name = char ':' *> do + skipMany spacenonewline + (tags, dateTags) <- case name of + "" -> pure ([], []) + "date" -> dateValue name + "date2" -> dateValue name + _ -> tagValue name + _ <- optional $ char ',' + bimap (tags++) (dateTags++) <$> commenttagsanddatesp mYear --- A line (file-level) comment can start with a semicolon, hash, or star --- (allowing org nodes). This parser discards the leading whitespace of --- the comment and returns the source position of the comment's first --- non-whitespace character. -linecommentp :: TextParser m (SourcePos, Text) -linecommentp = - commentStartingWithp $ \c -> c == ';' || c == '#' || c == '*' + dateValue :: Text -> TextParser m ([Tag], [DateTag]) + dateValue name = do + (txt, (date, dateTags)) <- match' $ do + date <- datep' mYear + dateTags <- readUpTo ',' + pure (date, dateTags) + let val = T.strip txt + pure $ ( [(name, val)] + , (name, date) : dateTags ) -commentStartingWithp :: (Char -> Bool) -> TextParser m (SourcePos, Text) -commentStartingWithp f = do - -- ptrace "commentStartingWith" - satisfy f - skipMany spacenonewline - startPos <- getPosition - content <- takeWhileP Nothing (\c -> c /= '\n') - optional newline - return (startPos, content) + tagValue :: Text -> TextParser m ([Tag], [DateTag]) + tagValue name = do + (txt, dateTags) <- match' $ readUpTo ',' + let val = T.strip txt + pure $ ( [(name, val)] + , dateTags ) ---- ** tags +{-# INLINABLE commenttagsanddatesp #-} --- | Extract any tags (name:value ended by comma or newline) embedded in a string. --- --- >>> commentTags "a b:, c:c d:d, e" --- [("b",""),("c","c d:d")] --- --- >>> commentTags "a [1/1/1] [1/1] [1], [=1/1/1] [=1/1] [=1] [1/1=1/1/1] [1=1/1/1] b:c" --- [("b","c")] --- --- --[("date","1/1/1"),("date","1/1"),("date2","1/1/1"),("date2","1/1"),("date","1/1"),("date2","1/1/1"),("date","1"),("date2","1/1/1")] --- --- >>> commentTags "\na b:, \nd:e, f" --- [("b",""),("d","e")] --- --- >>> commentTags ":value" --- [] --- -commentTags :: Text -> [Tag] -commentTags s = either (const []) id $ runTextParser tagsp s - --- | Parse all tags found in a string. -tagsp :: SimpleTextParser [Tag] -tagsp = map snd <$> tagswithvaluepositions - -tagswithvaluepositions :: SimpleTextParser [(SourcePos, Tag)] -tagswithvaluepositions = do - -- pdbg 0 $ "tagsp" - - -- If we parse in a single pass, we cannot know whether some text - -- belongs to a tag label until we have reached a colon (in which case - -- it does) or whitespace (in which case it does not). Therefore, we - -- hold on to the text until we reach such a break point, and then - -- decide what to do. - - potentialTagName <- tillNextBreak - atSpaceChar <|> atColon potentialTagName <|> atEof - - where - - isBreak :: Char -> Bool - isBreak c = isSpace c || c == ':' - - tillNextBreak :: SimpleTextParser Text - tillNextBreak = takeWhileP Nothing (not . isBreak) - - tagValue :: SimpleTextParser Text - tagValue = T.strip <$> takeWhileP Nothing (not . commaOrNewline) - where commaOrNewline c = c == ',' || c == '\n' - - atSpaceChar :: SimpleTextParser [(SourcePos, Tag)] - atSpaceChar = skipSome spaceChar *> tagswithvaluepositions - - atColon :: Text -> SimpleTextParser [(SourcePos, Tag)] - atColon tagName = do - char ':' - if T.null tagName - then tagswithvaluepositions - else do - pos <- getPosition - tagVal <- tagValue - let tag = (pos, (tagName, tagVal)) - tags <- tagswithvaluepositions - pure $ tag : tags - - atEof :: SimpleTextParser [(SourcePos, Tag)] - atEof = eof *> pure [] - ---- ** posting dates - --- | Parse all bracketed posting dates found in a string. The dates are --- parsed fully to give useful errors. Missing years can be inferred only --- if a default date is provided. --- -bracketedpostingdatesp :: Maybe Day -> SimpleTextParser [(TagName,Day)] -bracketedpostingdatesp mdefdate = do - -- pdbg 0 $ "bracketedpostingdatesp" - skipMany $ notChar '[' - concat <$> sepEndBy (bracketeddatetagsp mdefdate <|> char '[' *> pure []) - (skipMany $ notChar '[') --- ** bracketed dates @@ -1119,8 +1131,9 @@ bracketedpostingdatesp mdefdate = do -- >>> either (Left . parseErrorPretty) Right $ rtp (bracketeddatetagsp Nothing) "[0123456789/-.=/-.=]" -- Left ...1:13:...expecting month or day... -- -bracketeddatetagsp :: Maybe Day -> SimpleTextParser [(TagName, Day)] -bracketeddatetagsp mdefdate = do +bracketeddatetagsp + :: Maybe Year -> TextParser m [(TagName, Day)] +bracketeddatetagsp mYear1 = do -- pdbg 0 "bracketeddatetagsp" try $ do s <- lookAhead @@ -1131,14 +1144,24 @@ bracketeddatetagsp mdefdate = do -- Looks sufficiently like a bracketed date to commit to parsing a date between (char '[') (char ']') $ do - let myear1 = fmap readYear mdefdate - md1 <- optional $ datep' myear1 + md1 <- optional $ datep' mYear1 - let myear2 = fmap readYear md1 <|> myear1 - md2 <- optional $ char '=' *> datep' myear2 + let mYear2 = fmap readYear md1 <|> mYear1 + md2 <- optional $ char '=' *> datep' mYear2 pure $ catMaybes [("date",) <$> md1, ("date2",) <$> md2] where readYear = first3 . toGregorian isBracketedDateChar c = isDigit c || isDateSepChar c || c == '=' + +{-# INLINABLE bracketeddatetagsp #-} + + +--- ** helper parsers + +-- A version of `match` that is strict in the returned text +match' :: TextParser m a -> TextParser m (Text, a) +match' p = do + (!txt, p) <- match p + pure (txt, p) diff --git a/hledger-lib/Hledger/Read/CsvReader.hs b/hledger-lib/Hledger/Read/CsvReader.hs index 8d301f585..a17c066ff 100644 --- a/hledger-lib/Hledger/Read/CsvReader.hs +++ b/hledger-lib/Hledger/Read/CsvReader.hs @@ -45,7 +45,6 @@ import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.IO as T import Data.Time.Calendar (Day) -import Data.Void (Void) #if MIN_VERSION_time(1,5,0) import Data.Time.Format (parseTimeM, defaultTimeLocale) #else @@ -404,7 +403,7 @@ parseAndValidateCsvRules rulesfile s = do makeParseError f s = FancyError (fromList [initialPos f]) (S.singleton $ ErrorFail s) -- | Parse this text as CSV conversion rules. The file path is for error messages. -parseCsvRules :: FilePath -> T.Text -> Either (ParseError Char Void) CsvRules +parseCsvRules :: FilePath -> T.Text -> Either (ParseError Char CustomErr) CsvRules -- parseCsvRules rulesfile s = runParser csvrulesfile nullrules{baseAccount=takeBaseName rulesfile} rulesfile s parseCsvRules rulesfile s = runParser (evalStateT rulesp rules) rulesfile s diff --git a/hledger-lib/Hledger/Read/JournalReader.hs b/hledger-lib/Hledger/Read/JournalReader.hs index a357761b7..9e4be5425 100644 --- a/hledger-lib/Hledger/Read/JournalReader.hs +++ b/hledger-lib/Hledger/Read/JournalReader.hs @@ -42,8 +42,6 @@ module Hledger.Read.JournalReader ( parseAndFinaliseJournal, runJournalParser, rjp, - runErroringJournalParser, - rejp, -- * Parsers used elsewhere getParentAccount, @@ -75,7 +73,7 @@ import Prelude () import "base-compat-batteries" Prelude.Compat hiding (readFile) import qualified Control.Exception as C import Control.Monad -import Control.Monad.Except (ExceptT(..), runExceptT, throwError) +import Control.Monad.Except (ExceptT(..)) import Control.Monad.State.Strict import qualified Data.Map.Strict as M import Data.Text (Text) @@ -84,7 +82,6 @@ import Data.List import qualified Data.Text as T import Data.Time.Calendar import Data.Time.LocalTime -import Data.Void (Void) import Safe import Test.HUnit #ifdef TESTS @@ -93,6 +90,7 @@ import Text.Megaparsec.Error #endif import Text.Megaparsec hiding (parse) import Text.Megaparsec.Char +import Text.Megaparsec.Custom import Text.Printf import System.FilePath @@ -136,10 +134,10 @@ aliasesFromOpts = map (\a -> fromparse $ runParser accountaliasp ("--alias "++qu -- | A journal parser. Accumulates and returns a "ParsedJournal", -- which should be finalised/validated before use. -- --- >>> rejp (journalp <* eof) "2015/1/1\n a 0\n" +-- >>> rjp (journalp <* eof) "2015/1/1\n a 0\n" -- Right Journal with 1 transactions, 1 accounts -- -journalp :: MonadIO m => ErroringJournalParser m ParsedJournal +journalp :: MonadIO m => JournalParser m ParsedJournal journalp = do many addJournalItemP eof @@ -147,7 +145,7 @@ journalp = do -- | A side-effecting parser; parses any kind of journal item -- and updates the parse state accordingly. -addJournalItemP :: MonadIO m => ErroringJournalParser m () +addJournalItemP :: MonadIO m => JournalParser m () addJournalItemP = -- all journal line types can be distinguished by the first -- character, can use choice without backtracking @@ -166,7 +164,7 @@ addJournalItemP = -- | Parse any journal directive and update the parse state accordingly. -- Cf http://hledger.org/manual.html#directives, -- http://ledger-cli.org/3.0/doc/ledger3.html#Command-Directives -directivep :: MonadIO m => ErroringJournalParser m () +directivep :: MonadIO m => JournalParser m () directivep = (do optional $ char '!' choice [ @@ -186,40 +184,44 @@ directivep = (do ] ) "directive" -includedirectivep :: MonadIO m => ErroringJournalParser m () +includedirectivep :: MonadIO m => JournalParser m () includedirectivep = do string "include" lift (skipSome spacenonewline) - filename <- lift restofline - parentpos <- getPosition - parentj <- get + filename <- T.unpack <$> takeWhileP Nothing (/= '\n') -- don't consume newline yet + + -- save parent state + parentParserState <- getParserState + parentj <- get + let childj = newJournalWithParseStateFrom parentj - (ej :: Either String ParsedJournal) <- - liftIO $ runExceptT $ do - let curdir = takeDirectory (sourceName parentpos) - filepath <- expandPath curdir filename `orRethrowIOError` (show parentpos ++ " locating " ++ filename) - txt <- readFilePortably filepath `orRethrowIOError` (show parentpos ++ " reading " ++ filepath) - (ej1::Either (ParseError Char Void) ParsedJournal) <- - runParserT - (evalStateT - (choiceInState - [journalp - ,timeclockfilep - ,timedotfilep - -- can't include a csv file yet, that reader is special - ]) - childj) - filepath txt - either - (throwError - . ((show parentpos ++ " in included file " ++ show filename ++ ":\n") ++) - . parseErrorPretty) - (return . journalAddFile (filepath, txt)) - ej1 - case ej of - Left e -> throwError e - Right childj -> modify' (\parentj -> childj <> parentj) - -- discard child's parse info, prepend its (reversed) list data, combine other fields + parentpos <- getPosition + + -- read child input + let curdir = takeDirectory (sourceName parentpos) + filepath <- lift $ expandPath curdir filename `orRethrowIOError` (show parentpos ++ " locating " ++ filename) + childInput <- lift $ readFilePortably filepath `orRethrowIOError` (show parentpos ++ " reading " ++ filepath) + + -- set child state + setInput childInput + pushPosition $ initialPos filepath + put childj + + -- parse include file + let parsers = [ journalp + , timeclockfilep + , timedotfilep + ] -- can't include a csv file yet, that reader is special + updatedChildj <- journalAddFile (filepath, childInput) <$> + region (withSource childInput) (choiceInState parsers) + + -- restore parent state, prepending the child's parse info + setParserState parentParserState + put $ updatedChildj <> parentj + -- discard child's parse info, prepend its (reversed) list data, combine other fields + + void newline + newJournalWithParseStateFrom :: Journal -> Journal newJournalWithParseStateFrom j = mempty{ @@ -234,11 +236,12 @@ newJournalWithParseStateFrom j = mempty{ -- | Lift an IO action into the exception monad, rethrowing any IO -- error with the given message prepended. -orRethrowIOError :: IO a -> String -> ExceptT String IO a -orRethrowIOError io msg = - ExceptT $ - (Right <$> io) - `C.catch` \(e::C.IOException) -> return $ Left $ printf "%s:\n%s" msg (show e) +orRethrowIOError :: MonadIO m => IO a -> String -> TextParser m a +orRethrowIOError io msg = do + eResult <- liftIO $ (Right <$> io) `C.catch` \(e::C.IOException) -> pure $ Left $ printf "%s:\n%s" msg (show e) + case eResult of + Right res -> pure res + Left errMsg -> fail errMsg accountdirectivep :: JournalParser m () accountdirectivep = do @@ -248,12 +251,7 @@ accountdirectivep = do macode' :: Maybe String <- (optional $ lift $ skipSome spacenonewline >> some digitChar) let macode :: Maybe AccountCode = read <$> macode' newline - _tags <- many $ do - startpos <- getPosition - l <- indentedlinep - case runTextParser (setPosition startpos >> tagsp) $ T.pack l of - Right ts -> return ts - Left _e -> return [] -- TODO throwError $ parseErrorPretty e + skipMany indentedlinep modify' (\j -> j{jaccounts = (acct, macode) : jaccounts j}) @@ -262,28 +260,30 @@ indentedlinep = lift (skipSome spacenonewline) >> (rstrip <$> lift restofline) -- | Parse a one-line or multi-line commodity directive. -- --- >>> Right _ <- rejp commoditydirectivep "commodity $1.00" --- >>> Right _ <- rejp commoditydirectivep "commodity $\n format $1.00" --- >>> Right _ <- rejp commoditydirectivep "commodity $\n\n" -- a commodity with no format --- >>> Right _ <- rejp commoditydirectivep "commodity $1.00\n format $1.00" -- both, what happens ? -commoditydirectivep :: Monad m => ErroringJournalParser m () -commoditydirectivep = try commoditydirectiveonelinep <|> commoditydirectivemultilinep +-- >>> Right _ <- rjp commoditydirectivep "commodity $1.00" +-- >>> Right _ <- rjp commoditydirectivep "commodity $\n format $1.00" +-- >>> Right _ <- rjp commoditydirectivep "commodity $\n\n" -- a commodity with no format +-- >>> Right _ <- rjp commoditydirectivep "commodity $1.00\n format $1.00" -- both, what happens ? +commoditydirectivep :: JournalParser m () +commoditydirectivep = commoditydirectiveonelinep <|> commoditydirectivemultilinep -- | Parse a one-line commodity directive. -- --- >>> Right _ <- rejp commoditydirectiveonelinep "commodity $1.00" --- >>> Right _ <- rejp commoditydirectiveonelinep "commodity $1.00 ; blah\n" -commoditydirectiveonelinep :: Monad m => ErroringJournalParser m () +-- >>> Right _ <- rjp commoditydirectiveonelinep "commodity $1.00" +-- >>> Right _ <- rjp commoditydirectiveonelinep "commodity $1.00 ; blah\n" +commoditydirectiveonelinep :: JournalParser m () commoditydirectiveonelinep = do - string "commodity" - lift (skipSome spacenonewline) - pos <- getPosition - Amount{acommodity,astyle} <- amountp + (pos, Amount{acommodity,astyle}) <- try $ do + string "commodity" + lift (skipSome spacenonewline) + pos <- getPosition + amount <- amountp + pure $ (pos, amount) lift (skipMany spacenonewline) _ <- lift followingcommentp let comm = Commodity{csymbol=acommodity, cformat=Just $ dbg2 "style from commodity directive" astyle} if asdecimalpoint astyle == Nothing - then parserErrorAt pos pleaseincludedecimalpoint + then parseErrorAt pos pleaseincludedecimalpoint else modify' (\j -> j{jcommodities=M.insert acommodity comm $ jcommodities j}) pleaseincludedecimalpoint :: String @@ -291,8 +291,8 @@ pleaseincludedecimalpoint = "to avoid ambiguity, please include a decimal point -- | Parse a multi-line commodity directive, containing 0 or more format subdirectives. -- --- >>> Right _ <- rejp commoditydirectivemultilinep "commodity $ ; blah \n format $1.00 ; blah" -commoditydirectivemultilinep :: Monad m => ErroringJournalParser m () +-- >>> Right _ <- rjp commoditydirectivemultilinep "commodity $ ; blah \n format $1.00 ; blah" +commoditydirectivemultilinep :: JournalParser m () commoditydirectivemultilinep = do string "commodity" lift (skipSome spacenonewline) @@ -306,7 +306,7 @@ commoditydirectivemultilinep = do -- | Parse a format (sub)directive, throwing a parse error if its -- symbol does not match the one given. -formatdirectivep :: Monad m => CommoditySymbol -> ErroringJournalParser m AmountStyle +formatdirectivep :: CommoditySymbol -> JournalParser m AmountStyle formatdirectivep expectedsym = do string "format" lift (skipSome spacenonewline) @@ -316,9 +316,9 @@ formatdirectivep expectedsym = do if acommodity==expectedsym then if asdecimalpoint astyle == Nothing - then parserErrorAt pos pleaseincludedecimalpoint + then parseErrorAt pos pleaseincludedecimalpoint else return $ dbg2 "style from format subdirective" astyle - else parserErrorAt pos $ + else parseErrorAt pos $ printf "commodity directive symbol \"%s\" and format directive symbol \"%s\" should be the same" expectedsym acommodity keywordp :: String -> JournalParser m () @@ -403,7 +403,7 @@ defaultyeardirectivep = do failIfInvalidYear y setYear y' -defaultcommoditydirectivep :: Monad m => ErroringJournalParser m () +defaultcommoditydirectivep :: JournalParser m () defaultcommoditydirectivep = do char 'D' "default commodity" lift (skipSome spacenonewline) @@ -411,10 +411,10 @@ defaultcommoditydirectivep = do Amount{acommodity,astyle} <- amountp lift restofline if asdecimalpoint astyle == Nothing - then parserErrorAt pos pleaseincludedecimalpoint + then parseErrorAt pos pleaseincludedecimalpoint else setDefaultCommodityAndStyle (acommodity, astyle) -marketpricedirectivep :: Monad m => JournalParser m MarketPrice +marketpricedirectivep :: JournalParser m MarketPrice marketpricedirectivep = do char 'P' "market price" lift (skipMany spacenonewline) @@ -434,7 +434,7 @@ ignoredpricecommoditydirectivep = do lift restofline return () -commodityconversiondirectivep :: Monad m => JournalParser m () +commodityconversiondirectivep :: JournalParser m () commodityconversiondirectivep = do char 'C' "commodity conversion" lift (skipSome spacenonewline) @@ -448,7 +448,7 @@ commodityconversiondirectivep = do --- ** transactions -modifiertransactionp :: MonadIO m => ErroringJournalParser m ModifierTransaction +modifiertransactionp :: JournalParser m ModifierTransaction modifiertransactionp = do char '=' "modifier transaction" lift (skipMany spacenonewline) @@ -457,17 +457,17 @@ modifiertransactionp = do return $ ModifierTransaction valueexpr postings -- | Parse a periodic transaction -periodictransactionp :: MonadIO m => ErroringJournalParser m PeriodicTransaction +periodictransactionp :: JournalParser m PeriodicTransaction periodictransactionp = do char '~' "periodic transaction" lift (skipMany spacenonewline) - periodexpr <- T.strip <$> descriptionp + periodexpr <- lift $ T.strip <$> descriptionp _ <- lift followingcommentp postings <- postingsp Nothing return $ PeriodicTransaction periodexpr postings -- | Parse a (possibly unbalanced) transaction. -transactionp :: MonadIO m => ErroringJournalParser m Transaction +transactionp :: JournalParser m Transaction transactionp = do -- ptrace "transactionp" pos <- getPosition @@ -476,10 +476,10 @@ transactionp = do lookAhead (lift spacenonewline <|> newline) "whitespace or newline" status <- lift statusp "cleared status" code <- lift codep "transaction code" - description <- T.strip <$> descriptionp - comment <- lift followingcommentp - let tags = commentTags comment - postings <- postingsp (Just date) + description <- lift $ T.strip <$> descriptionp + (comment, tags) <- lift transactioncommentp + let year = first3 $ toGregorian date + postings <- postingsp (Just year) pos' <- getPosition let sourcepos = journalSourcePos pos pos' return $ txnTieKnot $ Transaction 0 sourcepos date edate status code description comment tags postings "" @@ -581,18 +581,18 @@ test_transactionp = do -- Parse the following whitespace-beginning lines as postings, posting -- tags, and/or comments (inferring year, if needed, from the given date). -postingsp :: MonadIO m => Maybe Day -> ErroringJournalParser m [Posting] -postingsp mdate = many (postingp mdate) "postings" +postingsp :: Maybe Year -> JournalParser m [Posting] +postingsp mTransactionYear = many (postingp mTransactionYear) "postings" --- linebeginningwithspaces :: Monad m => JournalParser m String +-- linebeginningwithspaces :: JournalParser m String -- linebeginningwithspaces = do -- sp <- lift (skipSome spacenonewline) -- c <- nonspace -- cs <- lift restofline -- return $ sp ++ (c:cs) ++ "\n" -postingp :: MonadIO m => Maybe Day -> ErroringJournalParser m Posting -postingp mtdate = do +postingp :: Maybe Year -> JournalParser m Posting +postingp mTransactionYear = do -- pdbg 0 "postingp" (status, account) <- try $ do lift (skipSome spacenonewline) @@ -605,7 +605,7 @@ postingp mtdate = do massertion <- partialbalanceassertionp _ <- fixedlotpricep lift (skipMany spacenonewline) - (comment,tags,mdate,mdate2) <- followingcommentandtagsp mtdate + (comment,tags,mdate,mdate2) <- lift $ postingcommentp mTransactionYear return posting { pdate=mdate , pdate2=mdate2 diff --git a/hledger-lib/Hledger/Read/TimeclockReader.hs b/hledger-lib/Hledger/Read/TimeclockReader.hs index 05b259c0b..0971fe03b 100644 --- a/hledger-lib/Hledger/Read/TimeclockReader.hs +++ b/hledger-lib/Hledger/Read/TimeclockReader.hs @@ -83,7 +83,7 @@ reader = Reader parse :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal parse = parseAndFinaliseJournal timeclockfilep -timeclockfilep :: ErroringJournalParser IO ParsedJournal +timeclockfilep :: MonadIO m => JournalParser m ParsedJournal timeclockfilep = do many timeclockitemp eof j@Journal{jparsetimeclockentries=es} <- get diff --git a/hledger-lib/Hledger/Utils/Parse.hs b/hledger-lib/Hledger/Utils/Parse.hs index 62a9fed94..a60a3d44c 100644 --- a/hledger-lib/Hledger/Utils/Parse.hs +++ b/hledger-lib/Hledger/Utils/Parse.hs @@ -1,34 +1,56 @@ {-# LANGUAGE CPP, TypeFamilies #-} -module Hledger.Utils.Parse where -import Control.Monad.Except +module Hledger.Utils.Parse ( + SimpleStringParser, + SimpleTextParser, + TextParser, + JournalParser, + + choice', + choiceInState, + surroundedBy, + parsewith, + parsewithString, + parseWithState, + parseWithState', + fromparse, + parseerror, + showDateParseError, + nonspace, + isNonNewlineSpace, + spacenonewline, + restofline, + eolof, + + -- * re-exports + CustomErr +) +where + import Control.Monad.State.Strict (StateT, evalStateT) import Data.Char import Data.Functor.Identity (Identity(..)) import Data.List import Data.Text (Text) -import Data.Void (Void) import Text.Megaparsec import Text.Megaparsec.Char +import Text.Megaparsec.Custom import Text.Printf import Hledger.Data.Types import Hledger.Utils.UTF8IOCompat (error') -- | A parser of string to some type. -type SimpleStringParser a = Parsec Void String a +type SimpleStringParser a = Parsec CustomErr String a -- | A parser of strict text to some type. -type SimpleTextParser = Parsec Void Text -- XXX an "a" argument breaks the CsvRulesParser declaration somehow +type SimpleTextParser = Parsec CustomErr Text -- XXX an "a" argument breaks the CsvRulesParser declaration somehow -- | A parser of text in some monad. -type TextParser m a = ParsecT Void Text m a +type TextParser m a = ParsecT CustomErr Text m a -- | A parser of text in some monad, with a journal as state. -type JournalParser m a = StateT Journal (ParsecT Void Text m) a - --- | A parser of text in some monad, with a journal as state, that can throw an error string mid-parse. -type ErroringJournalParser m a = StateT Journal (ParsecT Void Text (ExceptT String m)) a +type JournalParser m a = StateT Journal (ParsecT CustomErr Text m) a -- | Backtracking choice, use this when alternatives share a prefix. -- Consumes no input if all choices fail. @@ -37,7 +59,7 @@ choice' = choice . map try -- | Backtracking choice, use this when alternatives share a prefix. -- Consumes no input if all choices fail. -choiceInState :: [StateT s (ParsecT Void Text m) a] -> StateT s (ParsecT Void Text m) a +choiceInState :: [StateT s (ParsecT CustomErr Text m) a] -> StateT s (ParsecT CustomErr Text m) a choiceInState = choice . map try surroundedBy :: Applicative m => m openclose -> m a -> m a @@ -49,7 +71,7 @@ parsewith p = runParser p "" parsewithString :: Parsec e String a -> String -> Either (ParseError Char e) a parsewithString p = runParser p "" -parseWithState :: Monad m => st -> StateT st (ParsecT Void Text m) a -> Text -> m (Either (ParseError Char Void) a) +parseWithState :: Monad m => st -> StateT st (ParsecT CustomErr Text m) a -> Text -> m (Either (ParseError Char CustomErr) a) parseWithState ctx p s = runParserT (evalStateT p ctx) "" s parseWithState' @@ -78,7 +100,7 @@ nonspace = satisfy (not . isSpace) isNonNewlineSpace :: Char -> Bool isNonNewlineSpace c = c /= '\n' && isSpace c -spacenonewline :: (Stream s, Char ~ Token s) => ParsecT Void s m Char +spacenonewline :: (Stream s, Char ~ Token s) => ParsecT CustomErr s m Char spacenonewline = satisfy isNonNewlineSpace restofline :: TextParser m String diff --git a/hledger-lib/Text/Megaparsec/Custom.hs b/hledger-lib/Text/Megaparsec/Custom.hs new file mode 100644 index 000000000..5dce6f785 --- /dev/null +++ b/hledger-lib/Text/Megaparsec/Custom.hs @@ -0,0 +1,248 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE PackageImports #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE StandaloneDeriving #-} + +module Text.Megaparsec.Custom ( + -- * Custom parse error type + CustomErr, + + -- * Throwing custom parse errors + parseErrorAt, + parseErrorAtRegion, + withSource, + + -- * Pretty-printing custom parse errors + customParseErrorPretty +) +where + +import Prelude () +import "base-compat-batteries" Prelude.Compat hiding (readFile) + +import Data.Foldable (asum, toList) +import qualified Data.List.NonEmpty as NE +import Data.Proxy (Proxy (Proxy)) +import qualified Data.Set as S +import Data.Text (Text) +import Data.Void (Void) +import Text.Megaparsec + + +--- * Custom parse error type + +-- | A custom error type for the parser. The type is specialized to +-- parsers of 'Text' streams. + +data CustomErr + -- | Fail with a message at a specific source position interval. The + -- interval must be contained within a single line. + = ErrorFailAt SourcePos -- Starting position + Pos -- Ending position (column; same line as start) + String -- Error message + -- | Attach a source file to a parse error (for error reporting from + -- include files, e.g. with the 'region' parser combinator) + | ErrorWithSource Text -- Source file contents + (ParseError Char CustomErr) -- The original + deriving (Show, Eq, Ord) + +-- We require an 'Ord' instance for 'CustomError' so that they may be +-- stored in a 'Set'. The actual instance is inconsequential, so we just +-- derive it, but this requires an (orphan) instance for 'ParseError'. +-- Hopefully this does not cause any trouble. + +deriving instance (Ord c, Ord e) => Ord (ParseError c e) + +instance ShowErrorComponent CustomErr where + showErrorComponent (ErrorFailAt _ _ errMsg) = errMsg + showErrorComponent (ErrorWithSource _ e) = parseErrorTextPretty e + + +--- * Throwing custom parse errors + +-- | Fail at a specific source position. + +parseErrorAt :: MonadParsec CustomErr s m => SourcePos -> String -> m a +parseErrorAt pos msg = customFailure (ErrorFailAt pos (sourceColumn pos) msg) +{-# INLINABLE parseErrorAt #-} + +-- | Fail at a specific source interval (within a single line). The +-- interval is inclusive on the left and exclusive on the right; that is, +-- it spans from the start position to just before (and not including) the +-- end position. + +parseErrorAtRegion + :: MonadParsec CustomErr s m + => SourcePos -- ^ Start position + -> SourcePos -- ^ End position + -> String -- ^ Error message + -> m a +parseErrorAtRegion startPos endPos msg = + let startCol = sourceColumn startPos + endCol' = mkPos $ subtract 1 $ unPos $ sourceColumn endPos + endCol = if startCol <= endCol' + && sourceLine startPos == sourceLine endPos + then endCol' else startCol + in customFailure (ErrorFailAt startPos endCol msg) +{-# INLINABLE parseErrorAtRegion #-} + +-- | Attach a source file to a parse error. Intended for use with the +-- 'region' parser combinator. + +withSource :: Text -> ParseError Char CustomErr -> ParseError Char CustomErr +withSource s e = + FancyError (errorPos e) $ S.singleton $ ErrorCustom $ ErrorWithSource s e + + +--- * Pretty-printing custom parse errors + +-- | Pretty-print our custom parse errors and display the line on which +-- the parse error occured. Use this instead of 'parseErrorPretty'. +-- +-- If any custom errors are present, arbitrarily take the first one (since +-- only one custom error should be used at a time). + +customParseErrorPretty :: Text -> ParseError Char CustomErr -> String +customParseErrorPretty source err = case findCustomError err of + Nothing -> customParseErrorPretty' source err pos1 + + Just (ErrorWithSource customSource customErr) -> + customParseErrorPretty customSource customErr + + Just (ErrorFailAt sourcePos col errMsg) -> + let newPositionStack = sourcePos NE.:| NE.tail (errorPos err) + errorIntervalLength = mkPos $ max 1 $ + unPos col - unPos (sourceColumn sourcePos) + 1 + + newErr :: ParseError Char Void + newErr = FancyError newPositionStack (S.singleton (ErrorFail errMsg)) + + in customParseErrorPretty' source newErr errorIntervalLength + + where + findCustomError :: ParseError Char CustomErr -> Maybe CustomErr + findCustomError err = case err of + FancyError _ errSet -> + finds (\case {ErrorCustom e -> Just e; _ -> Nothing}) errSet + _ -> Nothing + + finds :: (Foldable t) => (a -> Maybe b) -> t a -> Maybe b + finds f = asum . map f . toList + + +--- * Modified Megaparsec source + +-- The below code has been copied from Megaparsec (v.6.4.1, +-- Text.Megaparsec.Error) and modified to suit our needs. These changes are +-- indicated by square brackets. The following copyright notice, conditions, +-- and disclaimer apply to all code below this point. +-- +-- Copyright © 2015–2018 Megaparsec contributors
+-- Copyright © 2007 Paolo Martini
+-- Copyright © 1999–2000 Daan Leijen +-- +-- All rights reserved. +-- +-- Redistribution and use in source and binary forms, with or without +-- modification, are permitted provided that the following conditions are met: +-- +-- * Redistributions of source code must retain the above copyright notice, +-- this list of conditions and the following disclaimer. +-- +-- * Redistributions in binary form must reproduce the above copyright notice, +-- this list of conditions and the following disclaimer in the documentation +-- and/or other materials provided with the distribution. +-- +-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS +-- OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +-- OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN +-- NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT, +-- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +-- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, +-- OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +-- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +-- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +-- EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +-- | Pretty-print a 'ParseError Char CustomErr' and display the line on +-- which the parse error occurred. The rendered 'String' always ends with +-- a newline. + +customParseErrorPretty' + :: ( ShowToken (Token s) + , LineToken (Token s) + , ShowErrorComponent e + , Stream s ) + => s -- ^ Original input stream + -> ParseError (Token s) e -- ^ Parse error to render + -> Pos -- ^ Length of error interval [added] + -> String -- ^ Result of rendering +customParseErrorPretty' = customParseErrorPretty_ defaultTabWidth + + +customParseErrorPretty_ + :: forall s e. + ( ShowToken (Token s) + , LineToken (Token s) + , ShowErrorComponent e + , Stream s ) + => Pos -- ^ Tab width + -> s -- ^ Original input stream + -> ParseError (Token s) e -- ^ Parse error to render + -> Pos -- ^ Length of error interval [added] + -> String -- ^ Result of rendering +customParseErrorPretty_ w s e l = + sourcePosStackPretty (errorPos e) <> ":\n" <> + padding <> "|\n" <> + lineNumber <> " | " <> rline <> "\n" <> + padding <> "| " <> rpadding <> highlight <> "\n" <> -- [added `highlight`] + parseErrorTextPretty e + where + epos = NE.head (errorPos e) -- [changed from NE.last to NE.head] + lineNumber = (show . unPos . sourceLine) epos + padding = replicate (length lineNumber + 1) ' ' + rpadding = replicate (unPos (sourceColumn epos) - 1) ' ' + highlight = replicate (unPos l) '^' -- [added] + rline = + case rline' of + [] -> "" + xs -> expandTab w xs + rline' = fmap tokenAsChar . chunkToTokens (Proxy :: Proxy s) $ + selectLine (sourceLine epos) s + +-- | Select a line from input stream given its number. + +selectLine + :: forall s. (LineToken (Token s), Stream s) + => Pos -- ^ Number of line to select + -> s -- ^ Input stream + -> Tokens s -- ^ Selected line +selectLine l = go pos1 + where + go !n !s = + if n == l + then fst (takeWhile_ notNewline s) + else go (n <> pos1) (stripNewline $ snd (takeWhile_ notNewline s)) + notNewline = not . tokenIsNewline + stripNewline s = + case take1_ s of + Nothing -> s + Just (_, s') -> s' + +-- | Replace tab characters with given number of spaces. + +expandTab + :: Pos + -> String + -> String +expandTab w' = go 0 + where + go 0 [] = [] + go 0 ('\t':xs) = go w xs + go 0 (x:xs) = x : go 0 xs + go !n xs = ' ' : go (n - 1) xs + w = unPos w' + diff --git a/hledger-lib/hledger-lib.cabal b/hledger-lib/hledger-lib.cabal index 6ccd34c9c..b5c36be9c 100644 --- a/hledger-lib/hledger-lib.cabal +++ b/hledger-lib/hledger-lib.cabal @@ -2,7 +2,7 @@ -- -- see: https://github.com/sol/hpack -- --- hash: 4e9f93f0ca43f594b381f1e1e03e67ce3379bd4830b260e6f7dc1596b946993f +-- hash: b808d840bfd7de5e860adb6ac41ec6bcee061cebcff87b4a1b87d2a46c58b0bf name: hledger-lib version: 1.9.99 @@ -95,6 +95,7 @@ library Hledger.Utils.UTF8IOCompat Text.Tabular.AsciiWide other-modules: + Text.Megaparsec.Custom Paths_hledger_lib hs-source-dirs: ./. @@ -187,6 +188,7 @@ test-suite doctests Hledger.Utils.Text Hledger.Utils.Tree Hledger.Utils.UTF8IOCompat + Text.Megaparsec.Custom Text.Tabular.AsciiWide Paths_hledger_lib hs-source-dirs: @@ -283,6 +285,7 @@ test-suite easytests Hledger.Utils.Text Hledger.Utils.Tree Hledger.Utils.UTF8IOCompat + Text.Megaparsec.Custom Text.Tabular.AsciiWide Paths_hledger_lib hs-source-dirs: @@ -379,6 +382,7 @@ test-suite hunittests Hledger.Utils.Text Hledger.Utils.Tree Hledger.Utils.UTF8IOCompat + Text.Megaparsec.Custom Text.Tabular.AsciiWide Paths_hledger_lib hs-source-dirs: diff --git a/hledger/Hledger/Cli/Commands/Rewrite.hs b/hledger/Hledger/Cli/Commands/Rewrite.hs index 529d38449..09d1d53c8 100755 --- a/hledger/Hledger/Cli/Commands/Rewrite.hs +++ b/hledger/Hledger/Cli/Commands/Rewrite.hs @@ -175,8 +175,8 @@ rewrite opts@CliOpts{rawopts_=rawopts,reportopts_=ropts} j@Journal{jtxns=ts} = d outputFromOpts rawopts opts{reportopts_=ropts{query_=""}} j j' postingp' :: T.Text -> IO Posting -postingp' t = runErroringJournalParser (postingp Nothing <* eof) t' >>= \case - Left err -> fail err +postingp' t = runJournalParser (postingp Nothing <* eof) t' >>= \case + Left err -> fail $ parseErrorPretty' t' err Right p -> return p where t' = " " <> t <> "\n" -- inject space and newline for proper parsing diff --git a/tests/journal/parse-errors.test b/tests/journal/parse-errors.test index ad814f2d6..32093fff8 100644 --- a/tests/journal/parse-errors.test +++ b/tests/journal/parse-errors.test @@ -8,6 +8,9 @@ $ hledger -f - print >2 hledger: -:1:5: + | +1 | 2018 + | ^ unexpected newline expecting date separator or the rest of year or month diff --git a/tests/journal/posting-dates.test b/tests/journal/posting-dates.test index 1a5ae084f..7485942f3 100644 --- a/tests/journal/posting-dates.test +++ b/tests/journal/posting-dates.test @@ -23,7 +23,7 @@ end comment b 0 ; date: 3.32 ->>>2 /10:19/ +>>>2 /10:16/ >>>=1 # 3. Ledger's bracketed date syntax is also supported: `[DATE]`,