;lib: clarify/extend/refactor some line parsing helpers (#1580)
This commit is contained in:
		
							parent
							
								
									424b883541
								
							
						
					
					
						commit
						46d3eaf920
					
				| @ -95,9 +95,10 @@ module Hledger.Read.Common ( | |||||||
|   rawnumberp, |   rawnumberp, | ||||||
| 
 | 
 | ||||||
|   -- ** comments |   -- ** comments | ||||||
|  |   isLineCommentStart, | ||||||
|  |   isSameLineCommentStart, | ||||||
|   multilinecommentp, |   multilinecommentp, | ||||||
|   emptyorcommentlinep, |   emptyorcommentlinep, | ||||||
| 
 |  | ||||||
|   followingcommentp, |   followingcommentp, | ||||||
|   transactioncommentp, |   transactioncommentp, | ||||||
|   postingcommentp, |   postingcommentp, | ||||||
| @ -106,8 +107,11 @@ module Hledger.Read.Common ( | |||||||
|   bracketeddatetagsp, |   bracketeddatetagsp, | ||||||
| 
 | 
 | ||||||
|   -- ** misc |   -- ** misc | ||||||
|   singlespacedtextp, |   noncommenttextp, | ||||||
|   singlespacedtextsatisfyingp, |   noncommenttext1p, | ||||||
|  |   singlespacedtext1p, | ||||||
|  |   singlespacednoncommenttext1p, | ||||||
|  |   singlespacedtextsatisfying1p, | ||||||
|   singlespacep, |   singlespacep, | ||||||
|   skipNonNewlineSpaces, |   skipNonNewlineSpaces, | ||||||
|   skipNonNewlineSpaces1, |   skipNonNewlineSpaces1, | ||||||
| @ -532,9 +536,11 @@ codep = option "" $ do | |||||||
|   char ')' <?> "closing bracket ')' for transaction code" |   char ')' <?> "closing bracket ')' for transaction code" | ||||||
|   pure code |   pure code | ||||||
| 
 | 
 | ||||||
|  | -- | Parse possibly empty text until a semicolon or newline. | ||||||
|  | -- Whitespace is preserved (for now - perhaps helps preserve alignment  | ||||||
|  | -- of same-line comments ?). | ||||||
| descriptionp :: TextParser m Text | descriptionp :: TextParser m Text | ||||||
| descriptionp = takeWhileP Nothing (not . semicolonOrNewline) | descriptionp = noncommenttextp <?> "description" | ||||||
|   where semicolonOrNewline c = c == ';' || c == '\n' |  | ||||||
| 
 | 
 | ||||||
| --- *** dates | --- *** dates | ||||||
| 
 | 
 | ||||||
| @ -694,19 +700,32 @@ modifiedaccountnamep = do | |||||||
| -- It should have required parts to start with an alphanumeric; | -- It should have required parts to start with an alphanumeric; | ||||||
| -- for now it remains as-is for backwards compatibility. | -- for now it remains as-is for backwards compatibility. | ||||||
| accountnamep :: TextParser m AccountName | accountnamep :: TextParser m AccountName | ||||||
| accountnamep = singlespacedtextp | accountnamep = singlespacedtext1p | ||||||
| 
 | 
 | ||||||
|  | -- | Parse possibly empty text, including whitespace,  | ||||||
|  | -- until a comment start (semicolon) or newline. | ||||||
|  | noncommenttextp :: TextParser m T.Text | ||||||
|  | noncommenttextp = takeWhileP Nothing (\c -> not $ isSameLineCommentStart c || isNewline c) | ||||||
| 
 | 
 | ||||||
| -- | Parse any text beginning with a non-whitespace character, until a | -- | Parse non-empty text, including whitespace,  | ||||||
| -- double space or the end of input. | -- until a comment start (semicolon) or newline. | ||||||
| -- TODO including characters which normally start a comment (;#) - exclude those ?  | noncommenttext1p :: TextParser m T.Text | ||||||
| singlespacedtextp :: TextParser m T.Text | noncommenttext1p = takeWhile1P Nothing (\c -> not $ isSameLineCommentStart c || isNewline c) | ||||||
| singlespacedtextp = singlespacedtextsatisfyingp (const True) |  | ||||||
| 
 | 
 | ||||||
| -- | Similar to 'singlespacedtextp', except that the text must only contain | -- | Parse non-empty, single-spaced text starting and ending with non-whitespace, | ||||||
| -- characters satisfying the given predicate. | -- until a double space or newline. | ||||||
| singlespacedtextsatisfyingp :: (Char -> Bool) -> TextParser m T.Text | singlespacedtext1p :: TextParser m T.Text | ||||||
| singlespacedtextsatisfyingp pred = do | singlespacedtext1p = singlespacedtextsatisfying1p (const True) | ||||||
|  | 
 | ||||||
|  | -- | Parse non-empty, single-spaced text starting and ending with non-whitespace, | ||||||
|  | -- until a comment start (semicolon), double space, or newline. | ||||||
|  | singlespacednoncommenttext1p :: TextParser m T.Text | ||||||
|  | singlespacednoncommenttext1p = singlespacedtextsatisfying1p (not . isSameLineCommentStart) | ||||||
|  | 
 | ||||||
|  | -- | Parse non-empty, single-spaced text starting and ending with non-whitespace, | ||||||
|  | -- where all characters satisfy the given predicate. | ||||||
|  | singlespacedtextsatisfying1p :: (Char -> Bool) -> TextParser m T.Text | ||||||
|  | singlespacedtextsatisfying1p pred = do | ||||||
|   firstPart <- partp |   firstPart <- partp | ||||||
|   otherParts <- many $ try $ singlespacep *> partp |   otherParts <- many $ try $ singlespacep *> partp | ||||||
|   pure $! T.unwords $ firstPart : otherParts |   pure $! T.unwords $ firstPart : otherParts | ||||||
| @ -1179,13 +1198,27 @@ emptyorcommentlinep = do | |||||||
|   where |   where | ||||||
|     skiplinecommentp :: TextParser m () |     skiplinecommentp :: TextParser m () | ||||||
|     skiplinecommentp = do |     skiplinecommentp = do | ||||||
|       satisfy $ \c -> c == ';' || c == '#' || c == '*' |       satisfy isLineCommentStart | ||||||
|       void $ takeWhileP Nothing (\c -> c /= '\n') |       void $ takeWhileP Nothing (/= '\n') | ||||||
|       optional newline |       optional newline | ||||||
|       pure () |       pure () | ||||||
| 
 | 
 | ||||||
| {-# INLINABLE emptyorcommentlinep #-} | {-# INLINABLE emptyorcommentlinep #-} | ||||||
| 
 | 
 | ||||||
|  | -- | Is this a character that, as the first non-whitespace on a line, | ||||||
|  | -- starts a comment line ? | ||||||
|  | isLineCommentStart :: Char -> Bool | ||||||
|  | isLineCommentStart '#' = True | ||||||
|  | isLineCommentStart '*' = True | ||||||
|  | isLineCommentStart ';' = True | ||||||
|  | isLineCommentStart _   = False | ||||||
|  | 
 | ||||||
|  | -- | Is this a character that, appearing anywhere within a line, | ||||||
|  | -- starts a comment ? | ||||||
|  | isSameLineCommentStart :: Char -> Bool | ||||||
|  | isSameLineCommentStart ';' = True | ||||||
|  | isSameLineCommentStart _   = False | ||||||
|  | 
 | ||||||
| -- A parser combinator for parsing (possibly multiline) comments | -- A parser combinator for parsing (possibly multiline) comments | ||||||
| -- following journal items. | -- following journal items. | ||||||
| -- | -- | ||||||
|  | |||||||
| @ -620,7 +620,7 @@ periodictransactionp = do | |||||||
|                   Nothing -> today |                   Nothing -> today | ||||||
|                   Just y  -> fromGregorian y 1 1 |                   Just y  -> fromGregorian y 1 1 | ||||||
|   periodExcerpt <- lift $ excerpt_ $ |   periodExcerpt <- lift $ excerpt_ $ | ||||||
|                     singlespacedtextsatisfyingp (\c -> c /= ';' && c /= '\n') |                     singlespacedtextsatisfying1p (\c -> c /= ';' && c /= '\n') | ||||||
|   let periodtxt = T.strip $ getExcerptText periodExcerpt |   let periodtxt = T.strip $ getExcerptText periodExcerpt | ||||||
| 
 | 
 | ||||||
|   -- first parsing with 'singlespacedtextp', then "re-parsing" with |   -- first parsing with 'singlespacedtextp', then "re-parsing" with | ||||||
|  | |||||||
| @ -19,6 +19,7 @@ module Hledger.Utils.Parse ( | |||||||
|   parseerror, |   parseerror, | ||||||
|   showDateParseError, |   showDateParseError, | ||||||
|   nonspace, |   nonspace, | ||||||
|  |   isNewline, | ||||||
|   isNonNewlineSpace, |   isNonNewlineSpace, | ||||||
|   restofline, |   restofline, | ||||||
|   eolof, |   eolof, | ||||||
| @ -119,13 +120,15 @@ showDateParseError | |||||||
|   :: (Show t, Show (Token t), Show e) => ParseErrorBundle t e -> String |   :: (Show t, Show (Token t), Show e) => ParseErrorBundle t e -> String | ||||||
| showDateParseError e = printf "date parse error (%s)" (intercalate ", " $ tail $ lines $ show e) | showDateParseError e = printf "date parse error (%s)" (intercalate ", " $ tail $ lines $ show e) | ||||||
| 
 | 
 | ||||||
|  | isNewline :: Char -> Bool  | ||||||
|  | isNewline '\n' = True | ||||||
|  | isNewline _    = False | ||||||
|  | 
 | ||||||
| nonspace :: TextParser m Char | nonspace :: TextParser m Char | ||||||
| nonspace = satisfy (not . isSpace) | nonspace = satisfy (not . isSpace) | ||||||
| 
 | 
 | ||||||
| isNonNewlineSpace :: Char -> Bool | isNonNewlineSpace :: Char -> Bool | ||||||
| isNonNewlineSpace c = c /= '\n' && isSpace c | isNonNewlineSpace c = not (isNewline c) && isSpace c | ||||||
| -- XXX support \r\n ? |  | ||||||
| -- isNonNewlineSpace c = c /= '\n' && c /= '\r' && isSpace c |  | ||||||
| 
 | 
 | ||||||
| spacenonewline :: (Stream s, Char ~ Token s) => ParsecT CustomErr s m Char | spacenonewline :: (Stream s, Char ~ Token s) => ParsecT CustomErr s m Char | ||||||
| spacenonewline = satisfy isNonNewlineSpace | spacenonewline = satisfy isNonNewlineSpace | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user