dev: clarify some confusing comment parsers a bit [#2241]

This commit is contained in:
Simon Michael 2024-09-28 18:53:46 -10:00
parent 45b862f84f
commit b28468e651

View File

@ -99,7 +99,7 @@ module Hledger.Read.Common (
emptyorcommentlinep2, emptyorcommentlinep2,
followingcommentp, followingcommentp,
transactioncommentp, transactioncommentp,
commenttagsp, commentlinetagsp,
postingcommentp, postingcommentp,
-- ** bracketed dates -- ** bracketed dates
@ -1307,25 +1307,44 @@ isSameLineCommentStart :: Char -> Bool
isSameLineCommentStart ';' = True isSameLineCommentStart ';' = True
isSameLineCommentStart _ = False isSameLineCommentStart _ = False
-- A parser for (possibly multiline) comments following a journal item. -- | Parse a comment following a journal item, possibly continued on multiple lines,
-- and return the comment text.
-- --
-- Comments following a journal item begin with a semicolon and extend to -- >>> rtp followingcommentp "" -- no comment
-- the end of the line. They may span multiple lines; any comment lines -- Right ""
-- not on the same line as the journal item must be indented (preceded by -- >>> rtp followingcommentp ";" -- just a (empty) same-line comment. newline is added
-- leading whitespace). -- Right "\n"
-- >>> rtp followingcommentp "; \n"
-- Right "\n"
-- >>> rtp followingcommentp ";\n ;\n" -- a same-line and a next-line comment
-- Right "\n\n"
-- >>> rtp followingcommentp "\n ;\n" -- just a next-line comment. Insert an empty same-line comment so the next-line comment doesn't become a same-line comment.
-- Right "\n\n"
-- --
-- Like Ledger, we sometimes allow data to be embedded in comments. Eg, followingcommentp :: TextParser m Text
-- comments on the account directive and on transactions can contain tags, followingcommentp =
-- and comments on postings can contain tags and/or bracketed posting dates. fst <$> followingcommentpWith (void $ takeWhileP Nothing (/= '\n')) -- XXX support \r\n ?
-- To handle these variations, this parser takes as parameter a subparser,
-- which should consume all input up until the next newline, and which can {-# INLINABLE followingcommentp #-}
-- optionally extract some kind of data from it.
-- followingcommentp' returns this data along with the full text of the comment. -- | Parse a following comment, possibly continued on multiple lines,
-- using the provided line parser to parse each line.
-- This returns the comment text, and the combined results from the line parser.
-- --
-- See followingcommentp for tests. -- Following comments begin with a semicolon and extend to the end of the line.
-- They can optionally be continued on the next lines,
-- where each next line begins with an indent and another semicolon.
-- (This parser expects to see these semicolons and indents.)
-- --
followingcommentp' :: (Monoid a, Show a) => TextParser m a -> TextParser m (Text, a) -- Like Ledger, we sometimes allow data to be embedded in comments.
followingcommentp' contentp = do -- account directive comments and transaction comments can contain tags,
-- and posting comments can contain tags or bracketed posting dates.
-- This helper lets us handle these variations.
-- The line parser should consume all input up until the next newline.
-- See followingcommentp for some tests.
--
followingcommentpWith :: (Monoid a, Show a) => TextParser m a -> TextParser m (Text, a)
followingcommentpWith contentp = do
skipNonNewlineSpaces skipNonNewlineSpaces
-- there can be 0 or 1 sameLine -- there can be 0 or 1 sameLine
sameLine <- try headerp *> ((:[]) <$> match' contentp) <|> pure [] sameLine <- try headerp *> ((:[]) <$> match' contentp) <|> pure []
@ -1346,25 +1365,35 @@ followingcommentp' contentp = do
where where
headerp = char ';' *> skipNonNewlineSpaces headerp = char ';' *> skipNonNewlineSpaces
{-# INLINABLE followingcommentp' #-} {-# INLINABLE followingcommentpWith #-}
-- | Parse the text of a (possibly multiline) comment following a journal item.
-- -- Parse the tags from a single comment line, eg for use with followingcommentpWith.
-- >>> rtp followingcommentp "" -- no comment -- XXX what part of a comment line ? leading whitespace / semicolon or not ?
-- Right "" commentlinetagsp :: TextParser m [Tag]
-- >>> rtp followingcommentp ";" -- just a (empty) same-line comment. newline is added commentlinetagsp = do
-- Right "\n" -- XXX sketchy
-- >>> rtp followingcommentp "; \n" tagName <- (last . T.split isSpace) <$> takeWhileP Nothing (\c -> c /= ':' && c /= '\n')
-- Right "\n" atColon tagName <|> pure [] -- if not ':', then either '\n' or EOF
-- >>> rtp followingcommentp ";\n ;\n" -- a same-line and a next-line comment
-- Right "\n\n" where
-- >>> rtp followingcommentp "\n ;\n" -- just a next-line comment. Insert an empty same-line comment so the next-line comment doesn't become a same-line comment. atColon :: Text -> TextParser m [Tag]
-- Right "\n\n" atColon name = char ':' *> do
-- if T.null name
followingcommentp :: TextParser m Text then commentlinetagsp
followingcommentp = else do
fst <$> followingcommentp' (void $ takeWhileP Nothing (/= '\n')) -- XXX support \r\n ? skipNonNewlineSpaces
{-# INLINABLE followingcommentp #-} val <- tagValue
let tag = (name, val)
(tag:) <$> commentlinetagsp
tagValue :: TextParser m Text
tagValue = do
val <- T.strip <$> takeWhileP Nothing (\c -> c /= ',' && c /= '\n')
_ <- optional $ char ','
pure val
{-# INLINABLE commentlinetagsp #-}
-- | Parse a transaction comment and extract its tags. -- | Parse a transaction comment and extract its tags.
@ -1393,33 +1422,9 @@ followingcommentp =
-- leading and trailing whitespace. -- leading and trailing whitespace.
-- --
transactioncommentp :: TextParser m (Text, [Tag]) transactioncommentp :: TextParser m (Text, [Tag])
transactioncommentp = followingcommentp' commenttagsp transactioncommentp = followingcommentpWith commentlinetagsp
{-# INLINABLE transactioncommentp #-} {-# INLINABLE transactioncommentp #-}
commenttagsp :: TextParser m [Tag]
commenttagsp = do
tagName <- (last . T.split isSpace) <$> takeWhileP Nothing (\c -> c /= ':' && c /= '\n')
atColon tagName <|> pure [] -- if not ':', then either '\n' or EOF
where
atColon :: Text -> TextParser m [Tag]
atColon name = char ':' *> do
if T.null name
then commenttagsp
else do
skipNonNewlineSpaces
val <- tagValue
let tag = (name, val)
(tag:) <$> commenttagsp
tagValue :: TextParser m Text
tagValue = do
val <- T.strip <$> takeWhileP Nothing (\c -> c /= ',' && c /= '\n')
_ <- optional $ char ','
pure val
{-# INLINABLE commenttagsp #-}
-- | Parse a posting comment and extract its tags and dates. -- | Parse a posting comment and extract its tags and dates.
-- --
@ -1473,7 +1478,7 @@ postingcommentp
:: Maybe Year -> TextParser m (Text, [Tag], Maybe Day, Maybe Day) :: Maybe Year -> TextParser m (Text, [Tag], Maybe Day, Maybe Day)
postingcommentp mYear = do postingcommentp mYear = do
(commentText, (tags, dateTags)) <- (commentText, (tags, dateTags)) <-
followingcommentp' (commenttagsanddatesp mYear) followingcommentpWith (commenttagsanddatesp mYear)
let mdate = snd <$> find ((=="date") .fst) dateTags let mdate = snd <$> find ((=="date") .fst) dateTags
mdate2 = snd <$> find ((=="date2").fst) dateTags mdate2 = snd <$> find ((=="date2").fst) dateTags
pure (commentText, tags, mdate, mdate2) pure (commentText, tags, mdate, mdate2)