;lib: clarify/extend/refactor some line parsing helpers (#1580)
This commit is contained in:
		
							parent
							
								
									424b883541
								
							
						
					
					
						commit
						46d3eaf920
					
				| @ -95,9 +95,10 @@ module Hledger.Read.Common ( | ||||
|   rawnumberp, | ||||
| 
 | ||||
|   -- ** comments | ||||
|   isLineCommentStart, | ||||
|   isSameLineCommentStart, | ||||
|   multilinecommentp, | ||||
|   emptyorcommentlinep, | ||||
| 
 | ||||
|   followingcommentp, | ||||
|   transactioncommentp, | ||||
|   postingcommentp, | ||||
| @ -106,8 +107,11 @@ module Hledger.Read.Common ( | ||||
|   bracketeddatetagsp, | ||||
| 
 | ||||
|   -- ** misc | ||||
|   singlespacedtextp, | ||||
|   singlespacedtextsatisfyingp, | ||||
|   noncommenttextp, | ||||
|   noncommenttext1p, | ||||
|   singlespacedtext1p, | ||||
|   singlespacednoncommenttext1p, | ||||
|   singlespacedtextsatisfying1p, | ||||
|   singlespacep, | ||||
|   skipNonNewlineSpaces, | ||||
|   skipNonNewlineSpaces1, | ||||
| @ -532,9 +536,11 @@ codep = option "" $ do | ||||
|   char ')' <?> "closing bracket ')' for transaction code" | ||||
|   pure code | ||||
| 
 | ||||
| -- | Parse possibly empty text until a semicolon or newline. | ||||
| -- Whitespace is preserved (for now - perhaps helps preserve alignment  | ||||
| -- of same-line comments ?). | ||||
| descriptionp :: TextParser m Text | ||||
| descriptionp = takeWhileP Nothing (not . semicolonOrNewline) | ||||
|   where semicolonOrNewline c = c == ';' || c == '\n' | ||||
| descriptionp = noncommenttextp <?> "description" | ||||
| 
 | ||||
| --- *** dates | ||||
| 
 | ||||
| @ -694,19 +700,32 @@ modifiedaccountnamep = do | ||||
| -- It should have required parts to start with an alphanumeric; | ||||
| -- for now it remains as-is for backwards compatibility. | ||||
| accountnamep :: TextParser m AccountName | ||||
| accountnamep = singlespacedtextp | ||||
| accountnamep = singlespacedtext1p | ||||
| 
 | ||||
| -- | Parse possibly empty text, including whitespace,  | ||||
| -- until a comment start (semicolon) or newline. | ||||
| noncommenttextp :: TextParser m T.Text | ||||
| noncommenttextp = takeWhileP Nothing (\c -> not $ isSameLineCommentStart c || isNewline c) | ||||
| 
 | ||||
| -- | Parse any text beginning with a non-whitespace character, until a | ||||
| -- double space or the end of input. | ||||
| -- TODO including characters which normally start a comment (;#) - exclude those ?  | ||||
| singlespacedtextp :: TextParser m T.Text | ||||
| singlespacedtextp = singlespacedtextsatisfyingp (const True) | ||||
| -- | Parse non-empty text, including whitespace,  | ||||
| -- until a comment start (semicolon) or newline. | ||||
| noncommenttext1p :: TextParser m T.Text | ||||
| noncommenttext1p = takeWhile1P Nothing (\c -> not $ isSameLineCommentStart c || isNewline c) | ||||
| 
 | ||||
| -- | Similar to 'singlespacedtextp', except that the text must only contain | ||||
| -- characters satisfying the given predicate. | ||||
| singlespacedtextsatisfyingp :: (Char -> Bool) -> TextParser m T.Text | ||||
| singlespacedtextsatisfyingp pred = do | ||||
| -- | Parse non-empty, single-spaced text starting and ending with non-whitespace, | ||||
| -- until a double space or newline. | ||||
| singlespacedtext1p :: TextParser m T.Text | ||||
| singlespacedtext1p = singlespacedtextsatisfying1p (const True) | ||||
| 
 | ||||
| -- | Parse non-empty, single-spaced text starting and ending with non-whitespace, | ||||
| -- until a comment start (semicolon), double space, or newline. | ||||
| singlespacednoncommenttext1p :: TextParser m T.Text | ||||
| singlespacednoncommenttext1p = singlespacedtextsatisfying1p (not . isSameLineCommentStart) | ||||
| 
 | ||||
| -- | Parse non-empty, single-spaced text starting and ending with non-whitespace, | ||||
| -- where all characters satisfy the given predicate. | ||||
| singlespacedtextsatisfying1p :: (Char -> Bool) -> TextParser m T.Text | ||||
| singlespacedtextsatisfying1p pred = do | ||||
|   firstPart <- partp | ||||
|   otherParts <- many $ try $ singlespacep *> partp | ||||
|   pure $! T.unwords $ firstPart : otherParts | ||||
| @ -1179,13 +1198,27 @@ emptyorcommentlinep = do | ||||
|   where | ||||
|     skiplinecommentp :: TextParser m () | ||||
|     skiplinecommentp = do | ||||
|       satisfy $ \c -> c == ';' || c == '#' || c == '*' | ||||
|       void $ takeWhileP Nothing (\c -> c /= '\n') | ||||
|       satisfy isLineCommentStart | ||||
|       void $ takeWhileP Nothing (/= '\n') | ||||
|       optional newline | ||||
|       pure () | ||||
| 
 | ||||
| {-# INLINABLE emptyorcommentlinep #-} | ||||
| 
 | ||||
| -- | Is this a character that, as the first non-whitespace on a line, | ||||
| -- starts a comment line ? | ||||
| isLineCommentStart :: Char -> Bool | ||||
| isLineCommentStart '#' = True | ||||
| isLineCommentStart '*' = True | ||||
| isLineCommentStart ';' = True | ||||
| isLineCommentStart _   = False | ||||
| 
 | ||||
| -- | Is this a character that, appearing anywhere within a line, | ||||
| -- starts a comment ? | ||||
| isSameLineCommentStart :: Char -> Bool | ||||
| isSameLineCommentStart ';' = True | ||||
| isSameLineCommentStart _   = False | ||||
| 
 | ||||
| -- A parser combinator for parsing (possibly multiline) comments | ||||
| -- following journal items. | ||||
| -- | ||||
|  | ||||
| @ -620,7 +620,7 @@ periodictransactionp = do | ||||
|                   Nothing -> today | ||||
|                   Just y  -> fromGregorian y 1 1 | ||||
|   periodExcerpt <- lift $ excerpt_ $ | ||||
|                     singlespacedtextsatisfyingp (\c -> c /= ';' && c /= '\n') | ||||
|                     singlespacedtextsatisfying1p (\c -> c /= ';' && c /= '\n') | ||||
|   let periodtxt = T.strip $ getExcerptText periodExcerpt | ||||
| 
 | ||||
|   -- first parsing with 'singlespacedtextp', then "re-parsing" with | ||||
|  | ||||
| @ -19,6 +19,7 @@ module Hledger.Utils.Parse ( | ||||
|   parseerror, | ||||
|   showDateParseError, | ||||
|   nonspace, | ||||
|   isNewline, | ||||
|   isNonNewlineSpace, | ||||
|   restofline, | ||||
|   eolof, | ||||
| @ -119,13 +120,15 @@ showDateParseError | ||||
|   :: (Show t, Show (Token t), Show e) => ParseErrorBundle t e -> String | ||||
| showDateParseError e = printf "date parse error (%s)" (intercalate ", " $ tail $ lines $ show e) | ||||
| 
 | ||||
| isNewline :: Char -> Bool  | ||||
| isNewline '\n' = True | ||||
| isNewline _    = False | ||||
| 
 | ||||
| nonspace :: TextParser m Char | ||||
| nonspace = satisfy (not . isSpace) | ||||
| 
 | ||||
| isNonNewlineSpace :: Char -> Bool | ||||
| isNonNewlineSpace c = c /= '\n' && isSpace c | ||||
| -- XXX support \r\n ? | ||||
| -- isNonNewlineSpace c = c /= '\n' && c /= '\r' && isSpace c | ||||
| isNonNewlineSpace c = not (isNewline c) && isSpace c | ||||
| 
 | ||||
| spacenonewline :: (Stream s, Char ~ Token s) => ParsecT CustomErr s m Char | ||||
| spacenonewline = satisfy isNonNewlineSpace | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user