From 46d3eaf920f2e6b99157a9ed6886640a645a98fb Mon Sep 17 00:00:00 2001 From: Simon Michael Date: Wed, 23 Jun 2021 14:12:17 -1000 Subject: [PATCH] ;lib: clarify/extend/refactor some line parsing helpers (#1580) --- hledger-lib/Hledger/Read/Common.hs | 67 +++++++++++++++++------ hledger-lib/Hledger/Read/JournalReader.hs | 2 +- hledger-lib/Hledger/Utils/Parse.hs | 9 ++- 3 files changed, 57 insertions(+), 21 deletions(-) diff --git a/hledger-lib/Hledger/Read/Common.hs b/hledger-lib/Hledger/Read/Common.hs index ad2008e29..3b106c926 100644 --- a/hledger-lib/Hledger/Read/Common.hs +++ b/hledger-lib/Hledger/Read/Common.hs @@ -95,9 +95,10 @@ module Hledger.Read.Common ( rawnumberp, -- ** comments + isLineCommentStart, + isSameLineCommentStart, multilinecommentp, emptyorcommentlinep, - followingcommentp, transactioncommentp, postingcommentp, @@ -106,8 +107,11 @@ module Hledger.Read.Common ( bracketeddatetagsp, -- ** misc - singlespacedtextp, - singlespacedtextsatisfyingp, + noncommenttextp, + noncommenttext1p, + singlespacedtext1p, + singlespacednoncommenttext1p, + singlespacedtextsatisfying1p, singlespacep, skipNonNewlineSpaces, skipNonNewlineSpaces1, @@ -532,9 +536,11 @@ codep = option "" $ do char ')' "closing bracket ')' for transaction code" pure code +-- | Parse possibly empty text until a semicolon or newline. +-- Whitespace is preserved (for now - perhaps helps preserve alignment +-- of same-line comments ?). descriptionp :: TextParser m Text -descriptionp = takeWhileP Nothing (not . semicolonOrNewline) - where semicolonOrNewline c = c == ';' || c == '\n' +descriptionp = noncommenttextp "description" --- *** dates @@ -694,19 +700,32 @@ modifiedaccountnamep = do -- It should have required parts to start with an alphanumeric; -- for now it remains as-is for backwards compatibility. accountnamep :: TextParser m AccountName -accountnamep = singlespacedtextp +accountnamep = singlespacedtext1p +-- | Parse possibly empty text, including whitespace, +-- until a comment start (semicolon) or newline. +noncommenttextp :: TextParser m T.Text +noncommenttextp = takeWhileP Nothing (\c -> not $ isSameLineCommentStart c || isNewline c) --- | Parse any text beginning with a non-whitespace character, until a --- double space or the end of input. --- TODO including characters which normally start a comment (;#) - exclude those ? -singlespacedtextp :: TextParser m T.Text -singlespacedtextp = singlespacedtextsatisfyingp (const True) +-- | Parse non-empty text, including whitespace, +-- until a comment start (semicolon) or newline. +noncommenttext1p :: TextParser m T.Text +noncommenttext1p = takeWhile1P Nothing (\c -> not $ isSameLineCommentStart c || isNewline c) --- | Similar to 'singlespacedtextp', except that the text must only contain --- characters satisfying the given predicate. -singlespacedtextsatisfyingp :: (Char -> Bool) -> TextParser m T.Text -singlespacedtextsatisfyingp pred = do +-- | Parse non-empty, single-spaced text starting and ending with non-whitespace, +-- until a double space or newline. +singlespacedtext1p :: TextParser m T.Text +singlespacedtext1p = singlespacedtextsatisfying1p (const True) + +-- | Parse non-empty, single-spaced text starting and ending with non-whitespace, +-- until a comment start (semicolon), double space, or newline. +singlespacednoncommenttext1p :: TextParser m T.Text +singlespacednoncommenttext1p = singlespacedtextsatisfying1p (not . isSameLineCommentStart) + +-- | Parse non-empty, single-spaced text starting and ending with non-whitespace, +-- where all characters satisfy the given predicate. +singlespacedtextsatisfying1p :: (Char -> Bool) -> TextParser m T.Text +singlespacedtextsatisfying1p pred = do firstPart <- partp otherParts <- many $ try $ singlespacep *> partp pure $! T.unwords $ firstPart : otherParts @@ -1179,13 +1198,27 @@ emptyorcommentlinep = do where skiplinecommentp :: TextParser m () skiplinecommentp = do - satisfy $ \c -> c == ';' || c == '#' || c == '*' - void $ takeWhileP Nothing (\c -> c /= '\n') + satisfy isLineCommentStart + void $ takeWhileP Nothing (/= '\n') optional newline pure () {-# INLINABLE emptyorcommentlinep #-} +-- | Is this a character that, as the first non-whitespace on a line, +-- starts a comment line ? +isLineCommentStart :: Char -> Bool +isLineCommentStart '#' = True +isLineCommentStart '*' = True +isLineCommentStart ';' = True +isLineCommentStart _ = False + +-- | Is this a character that, appearing anywhere within a line, +-- starts a comment ? +isSameLineCommentStart :: Char -> Bool +isSameLineCommentStart ';' = True +isSameLineCommentStart _ = False + -- A parser combinator for parsing (possibly multiline) comments -- following journal items. -- diff --git a/hledger-lib/Hledger/Read/JournalReader.hs b/hledger-lib/Hledger/Read/JournalReader.hs index d03f22a4a..6ae1e205c 100644 --- a/hledger-lib/Hledger/Read/JournalReader.hs +++ b/hledger-lib/Hledger/Read/JournalReader.hs @@ -620,7 +620,7 @@ periodictransactionp = do Nothing -> today Just y -> fromGregorian y 1 1 periodExcerpt <- lift $ excerpt_ $ - singlespacedtextsatisfyingp (\c -> c /= ';' && c /= '\n') + singlespacedtextsatisfying1p (\c -> c /= ';' && c /= '\n') let periodtxt = T.strip $ getExcerptText periodExcerpt -- first parsing with 'singlespacedtextp', then "re-parsing" with diff --git a/hledger-lib/Hledger/Utils/Parse.hs b/hledger-lib/Hledger/Utils/Parse.hs index 82c62195a..5785765c0 100644 --- a/hledger-lib/Hledger/Utils/Parse.hs +++ b/hledger-lib/Hledger/Utils/Parse.hs @@ -19,6 +19,7 @@ module Hledger.Utils.Parse ( parseerror, showDateParseError, nonspace, + isNewline, isNonNewlineSpace, restofline, eolof, @@ -119,13 +120,15 @@ showDateParseError :: (Show t, Show (Token t), Show e) => ParseErrorBundle t e -> String showDateParseError e = printf "date parse error (%s)" (intercalate ", " $ tail $ lines $ show e) +isNewline :: Char -> Bool +isNewline '\n' = True +isNewline _ = False + nonspace :: TextParser m Char nonspace = satisfy (not . isSpace) isNonNewlineSpace :: Char -> Bool -isNonNewlineSpace c = c /= '\n' && isSpace c --- XXX support \r\n ? --- isNonNewlineSpace c = c /= '\n' && c /= '\r' && isSpace c +isNonNewlineSpace c = not (isNewline c) && isSpace c spacenonewline :: (Stream s, Char ~ Token s) => ParsecT CustomErr s m Char spacenonewline = satisfy isNonNewlineSpace