dev: comment parsing tests/refactoring, fix indented timedot comments

This commit is contained in:
Simon Michael 2024-06-24 11:57:51 +01:00
parent 713c3f4067
commit 969b5a89d1
5 changed files with 56 additions and 45 deletions

View File

@ -96,7 +96,7 @@ module Hledger.Read.Common (
isSameLineCommentStart,
multilinecommentp,
emptyorcommentlinep,
emptyorcommentlinep',
emptyorcommentlinep2,
followingcommentp,
transactioncommentp,
commenttagsp,
@ -407,6 +407,10 @@ setYear y = modify' (\j -> j{jparsedefaultyear=Just y})
getYear :: JournalParser m (Maybe Year)
getYear = fmap jparsedefaultyear get
dp :: String -> TextParser m ()
dp = const $ return () -- no-op
-- dp = dbgparse 1 -- trace parse state at this --debug level
-- | Get the decimal mark that has been specified for parsing, if any
-- (eg by the CSV decimal-mark rule, or possibly a future journal directive).
-- Return it as an AmountStyle that amount parsers can use.
@ -1260,9 +1264,10 @@ multilinecommentp = startComment *> anyLine `skipManyTill` endComment
-- | A blank or comment line in journal format: a line that's empty or
-- containing only whitespace or whose first non-whitespace character
-- is semicolon, hash, or star.
-- is semicolon, hash, or star. See also emptyorcommentlinep2.
emptyorcommentlinep :: TextParser m ()
emptyorcommentlinep = do
dp "emptyorcommentlinep"
skipNonNewlineSpaces
skiplinecommentp <|> void newline
where
@ -1275,25 +1280,18 @@ emptyorcommentlinep = do
{-# INLINABLE emptyorcommentlinep #-}
dp :: String -> TextParser m ()
dp = const $ return () -- no-op
-- dp = dbgparse 1 -- trace parse state at this --debug level
-- | A new comment line parser (from TimedotReader).
-- Parse empty lines, all-blank lines, and lines beginning with any of
-- the provided comment-beginning characters.
emptyorcommentlinep' :: [Char] -> TextParser m ()
emptyorcommentlinep' cs = do
dp "emptyorcommentlinep'"
label ("empty line or comment line beginning with "++cs) $
void commentp <|> void (try $ skipNonNewlineSpaces >> newline)
where
commentp = do
choice (map (some.char) cs)
void $ takeWhileP Nothing (/='\n')
void $ optional newline
{-# INLINABLE emptyorcommentlinep' #-}
-- | A newer comment line parser.
-- Parses a line which is empty, all blanks, or whose first non-blank character is one of those provided.
emptyorcommentlinep2 :: [Char] -> TextParser m ()
emptyorcommentlinep2 cs =
label ("empty line or comment line beginning with "++cs) $ do
dp "emptyorcommentlinep2"
skipNonNewlineSpaces
void newline <|> void commentp
where
commentp = do
choice (map (some.char) cs)
takeWhileP Nothing (/='\n') <* newline
-- | Is this a character that, as the first non-whitespace on a line,
-- starts a comment line ?

View File

@ -50,7 +50,7 @@ import Text.Megaparsec hiding (parse)
import Text.Megaparsec.Char
import Hledger.Data
import Hledger.Read.Common hiding (emptyorcommentlinep)
import Hledger.Read.Common
import Hledger.Utils
import Data.Decimal (roundTo)
import Data.Functor ((<&>))
@ -112,7 +112,7 @@ timedotp = preamblep >> many dayp >> eof >> get
preamblep :: JournalParser m ()
preamblep = do
dp "preamblep"
void $ many $ notFollowedBy datelinep >> (lift $ emptyorcommentlinep "#;*")
void $ many $ notFollowedBy datelinep >> (lift $ emptyorcommentlinep2 "#;*")
-- | Parse timedot day entries to multi-posting time transactions for that day.
-- @
@ -156,7 +156,7 @@ datelinep = do
commentlinesp :: JournalParser m ()
commentlinesp = do
dp "commentlinesp"
void $ many $ try $ lift $ emptyorcommentlinep "#;"
void $ many $ try $ lift $ emptyorcommentlinep2 "#;"
-- orgnondatelinep :: JournalParser m ()
-- orgnondatelinep = do
@ -274,18 +274,3 @@ letterquantitiesp =
| t@(c:_) <- group $ sort $ letter1:letters
]
return groups
-- | XXX new comment line parser, move to Hledger.Read.Common.emptyorcommentlinep
-- Parse empty lines, all-blank lines, and lines beginning with any of the provided
-- comment-beginning characters.
emptyorcommentlinep :: [Char] -> TextParser m ()
emptyorcommentlinep cs =
label ("empty line or comment line beginning with "++cs) $ do
dbgparse tracelevel "emptyorcommentlinep"
skipNonNewlineSpaces
void newline <|> void commentp
where
commentp = do
choice (map (some.char) cs)
takeWhileP Nothing (/='\n') <* newline

View File

@ -98,8 +98,8 @@ dp = const $ return () -- no-op
whitespacep, commentlinesp, restoflinep :: TextParser Identity ()
whitespacep = void $ {- dp "whitespacep" >> -} many spacenonewline
commentlinesp = void $ {- dp "commentlinesp" >> -} many (emptyorcommentlinep' "#")
restoflinep = void $ {- dp "restoflinep" >> -} whitespacep >> emptyorcommentlinep' "#"
commentlinesp = void $ {- dp "commentlinesp" >> -} many (emptyorcommentlinep2 "#")
restoflinep = void $ {- dp "restoflinep" >> -} whitespacep >> emptyorcommentlinep2 "#"
confp :: TextParser Identity [ConfSection] -- a monadic TextParser to allow reusing other hledger parsers
confp = do

View File

@ -31,7 +31,7 @@ $ hledger -f - print
>= 0
# ** 2. transaction comments must use ;
# ** 2. Same-line transaction comments start only with ;. The description can contain # or *.
<
2017/1/1 this # and * are not ; the comment
$ hledger -f - print
@ -39,16 +39,33 @@ $ hledger -f - print
>= 0
# ** 3. posting comments must use ;
# ** 3. Same-line posting comments start only with ;.
<
2017/1/1
a 0 # hash & star not allowed for posting comments
a 0 # posting comments can't start with # or *
$ hledger -f - print
> //
>2 //
>= 1
# ** 4. register does not show comments
# ** 4. Postings can be commented by a ; or a non-indented # (indented # or * does not start a comment).
# Account names can contain # or (not at start) *.
<
2024-01-01
#a 1 ; posting to #a account
b* -1 ; posting to b* account
*c 0 ; posting to c account, with * status mark
;d 0 ; a comment line attached to the c posting above, part of the transcction
; e 0 ; top level comment line, not part of the transaction
# f 0 ; top level comment line, not part of the transaction
$ hledger -f - accounts
#a
b*
c
# ** 5. register does not show comments.
<
2010/1/1 x
a 1 ; comment

View File

@ -79,3 +79,14 @@ $ hledger -ftimedot:- print
(g) 0.25
>=
# ** 5. Lines can be commented by a leading # or ;, indented or not.
<
2024-01-01
;a
;b
#c
#d
e
$ hledger -ftimedot:- accounts
e