;lib: clarify/extend/refactor some line parsing helpers (#1580)

This commit is contained in:
Simon Michael 2021-06-23 14:12:17 -10:00
parent 424b883541
commit 46d3eaf920
3 changed files with 57 additions and 21 deletions

View File

@ -95,9 +95,10 @@ module Hledger.Read.Common (
rawnumberp, rawnumberp,
-- ** comments -- ** comments
isLineCommentStart,
isSameLineCommentStart,
multilinecommentp, multilinecommentp,
emptyorcommentlinep, emptyorcommentlinep,
followingcommentp, followingcommentp,
transactioncommentp, transactioncommentp,
postingcommentp, postingcommentp,
@ -106,8 +107,11 @@ module Hledger.Read.Common (
bracketeddatetagsp, bracketeddatetagsp,
-- ** misc -- ** misc
singlespacedtextp, noncommenttextp,
singlespacedtextsatisfyingp, noncommenttext1p,
singlespacedtext1p,
singlespacednoncommenttext1p,
singlespacedtextsatisfying1p,
singlespacep, singlespacep,
skipNonNewlineSpaces, skipNonNewlineSpaces,
skipNonNewlineSpaces1, skipNonNewlineSpaces1,
@ -532,9 +536,11 @@ codep = option "" $ do
char ')' <?> "closing bracket ')' for transaction code" char ')' <?> "closing bracket ')' for transaction code"
pure code pure code
-- | Parse possibly empty text until a semicolon or newline.
-- Whitespace is preserved (for now - perhaps helps preserve alignment
-- of same-line comments ?).
descriptionp :: TextParser m Text descriptionp :: TextParser m Text
descriptionp = takeWhileP Nothing (not . semicolonOrNewline) descriptionp = noncommenttextp <?> "description"
where semicolonOrNewline c = c == ';' || c == '\n'
--- *** dates --- *** dates
@ -694,19 +700,32 @@ modifiedaccountnamep = do
-- It should have required parts to start with an alphanumeric; -- It should have required parts to start with an alphanumeric;
-- for now it remains as-is for backwards compatibility. -- for now it remains as-is for backwards compatibility.
accountnamep :: TextParser m AccountName accountnamep :: TextParser m AccountName
accountnamep = singlespacedtextp accountnamep = singlespacedtext1p
-- | Parse possibly empty text, including whitespace,
-- until a comment start (semicolon) or newline.
noncommenttextp :: TextParser m T.Text
noncommenttextp = takeWhileP Nothing (\c -> not $ isSameLineCommentStart c || isNewline c)
-- | Parse any text beginning with a non-whitespace character, until a -- | Parse non-empty text, including whitespace,
-- double space or the end of input. -- until a comment start (semicolon) or newline.
-- TODO including characters which normally start a comment (;#) - exclude those ? noncommenttext1p :: TextParser m T.Text
singlespacedtextp :: TextParser m T.Text noncommenttext1p = takeWhile1P Nothing (\c -> not $ isSameLineCommentStart c || isNewline c)
singlespacedtextp = singlespacedtextsatisfyingp (const True)
-- | Similar to 'singlespacedtextp', except that the text must only contain -- | Parse non-empty, single-spaced text starting and ending with non-whitespace,
-- characters satisfying the given predicate. -- until a double space or newline.
singlespacedtextsatisfyingp :: (Char -> Bool) -> TextParser m T.Text singlespacedtext1p :: TextParser m T.Text
singlespacedtextsatisfyingp pred = do singlespacedtext1p = singlespacedtextsatisfying1p (const True)
-- | Parse non-empty, single-spaced text starting and ending with non-whitespace,
-- until a comment start (semicolon), double space, or newline.
singlespacednoncommenttext1p :: TextParser m T.Text
singlespacednoncommenttext1p = singlespacedtextsatisfying1p (not . isSameLineCommentStart)
-- | Parse non-empty, single-spaced text starting and ending with non-whitespace,
-- where all characters satisfy the given predicate.
singlespacedtextsatisfying1p :: (Char -> Bool) -> TextParser m T.Text
singlespacedtextsatisfying1p pred = do
firstPart <- partp firstPart <- partp
otherParts <- many $ try $ singlespacep *> partp otherParts <- many $ try $ singlespacep *> partp
pure $! T.unwords $ firstPart : otherParts pure $! T.unwords $ firstPart : otherParts
@ -1179,13 +1198,27 @@ emptyorcommentlinep = do
where where
skiplinecommentp :: TextParser m () skiplinecommentp :: TextParser m ()
skiplinecommentp = do skiplinecommentp = do
satisfy $ \c -> c == ';' || c == '#' || c == '*' satisfy isLineCommentStart
void $ takeWhileP Nothing (\c -> c /= '\n') void $ takeWhileP Nothing (/= '\n')
optional newline optional newline
pure () pure ()
{-# INLINABLE emptyorcommentlinep #-} {-# INLINABLE emptyorcommentlinep #-}
-- | Is this a character that, as the first non-whitespace on a line,
-- starts a comment line ?
isLineCommentStart :: Char -> Bool
isLineCommentStart '#' = True
isLineCommentStart '*' = True
isLineCommentStart ';' = True
isLineCommentStart _ = False
-- | Is this a character that, appearing anywhere within a line,
-- starts a comment ?
isSameLineCommentStart :: Char -> Bool
isSameLineCommentStart ';' = True
isSameLineCommentStart _ = False
-- A parser combinator for parsing (possibly multiline) comments -- A parser combinator for parsing (possibly multiline) comments
-- following journal items. -- following journal items.
-- --

View File

@ -620,7 +620,7 @@ periodictransactionp = do
Nothing -> today Nothing -> today
Just y -> fromGregorian y 1 1 Just y -> fromGregorian y 1 1
periodExcerpt <- lift $ excerpt_ $ periodExcerpt <- lift $ excerpt_ $
singlespacedtextsatisfyingp (\c -> c /= ';' && c /= '\n') singlespacedtextsatisfying1p (\c -> c /= ';' && c /= '\n')
let periodtxt = T.strip $ getExcerptText periodExcerpt let periodtxt = T.strip $ getExcerptText periodExcerpt
-- first parsing with 'singlespacedtextp', then "re-parsing" with -- first parsing with 'singlespacedtextp', then "re-parsing" with

View File

@ -19,6 +19,7 @@ module Hledger.Utils.Parse (
parseerror, parseerror,
showDateParseError, showDateParseError,
nonspace, nonspace,
isNewline,
isNonNewlineSpace, isNonNewlineSpace,
restofline, restofline,
eolof, eolof,
@ -119,13 +120,15 @@ showDateParseError
:: (Show t, Show (Token t), Show e) => ParseErrorBundle t e -> String :: (Show t, Show (Token t), Show e) => ParseErrorBundle t e -> String
showDateParseError e = printf "date parse error (%s)" (intercalate ", " $ tail $ lines $ show e) showDateParseError e = printf "date parse error (%s)" (intercalate ", " $ tail $ lines $ show e)
isNewline :: Char -> Bool
isNewline '\n' = True
isNewline _ = False
nonspace :: TextParser m Char nonspace :: TextParser m Char
nonspace = satisfy (not . isSpace) nonspace = satisfy (not . isSpace)
isNonNewlineSpace :: Char -> Bool isNonNewlineSpace :: Char -> Bool
isNonNewlineSpace c = c /= '\n' && isSpace c isNonNewlineSpace c = not (isNewline c) && isSpace c
-- XXX support \r\n ?
-- isNonNewlineSpace c = c /= '\n' && c /= '\r' && isSpace c
spacenonewline :: (Stream s, Char ~ Token s) => ParsecT CustomErr s m Char spacenonewline :: (Stream s, Char ~ Token s) => ParsecT CustomErr s m Char
spacenonewline = satisfy isNonNewlineSpace spacenonewline = satisfy isNonNewlineSpace