lib: rewrite comment parsers [API]
- avoid the "re-parsing" of comments using the `match` parser combinator
This commit is contained in:
		
							parent
							
								
									d707b351cc
								
							
						
					
					
						commit
						9b6558401f
					
				| @ -183,6 +183,7 @@ instance NFData PostingType | |||||||
| type TagName = Text | type TagName = Text | ||||||
| type TagValue = Text | type TagValue = Text | ||||||
| type Tag = (TagName, TagValue)  -- ^ A tag name and (possibly empty) value. | type Tag = (TagName, TagValue)  -- ^ A tag name and (possibly empty) value. | ||||||
|  | type DateTag = (TagName, Day) | ||||||
| 
 | 
 | ||||||
| -- | The status of a transaction or posting, recorded with a status mark | -- | The status of a transaction or posting, recorded with a status mark | ||||||
| -- (nothing, !, or *). What these mean is ultimately user defined. | -- (nothing, !, or *). What these mean is ultimately user defined. | ||||||
|  | |||||||
| @ -14,6 +14,7 @@ Some of these might belong in Hledger.Read.JournalReader or Hledger.Read. | |||||||
| 
 | 
 | ||||||
| --- * module | --- * module | ||||||
| {-# LANGUAGE CPP, BangPatterns, DeriveDataTypeable, RecordWildCards, NamedFieldPuns, NoMonoLocalBinds, ScopedTypeVariables, FlexibleContexts, TupleSections, OverloadedStrings #-} | {-# LANGUAGE CPP, BangPatterns, DeriveDataTypeable, RecordWildCards, NamedFieldPuns, NoMonoLocalBinds, ScopedTypeVariables, FlexibleContexts, TupleSections, OverloadedStrings #-} | ||||||
|  | {-# LANGUAGE TypeFamilies #-} | ||||||
| {-# LANGUAGE LambdaCase #-} | {-# LANGUAGE LambdaCase #-} | ||||||
| {-# LANGUAGE PackageImports #-} | {-# LANGUAGE PackageImports #-} | ||||||
| 
 | 
 | ||||||
| @ -81,12 +82,10 @@ module Hledger.Read.Common ( | |||||||
|   -- ** comments |   -- ** comments | ||||||
|   multilinecommentp, |   multilinecommentp, | ||||||
|   emptyorcommentlinep, |   emptyorcommentlinep, | ||||||
|   followingcommentp, |  | ||||||
|   followingcommentandtagsp, |  | ||||||
| 
 | 
 | ||||||
|   -- ** tags |   followingcommentp, | ||||||
|   commentTags, |   transactioncommentp, | ||||||
|   tagsp, |   postingcommentp, | ||||||
| 
 | 
 | ||||||
|   -- ** bracketed dates |   -- ** bracketed dates | ||||||
|   bracketeddatetagsp |   bracketeddatetagsp | ||||||
| @ -98,6 +97,7 @@ import "base-compat-batteries" Prelude.Compat hiding (readFile) | |||||||
| import "base-compat-batteries" Control.Monad.Compat | import "base-compat-batteries" Control.Monad.Compat | ||||||
| import Control.Monad.Except (ExceptT(..), runExceptT, throwError) --, catchError) | import Control.Monad.Except (ExceptT(..), runExceptT, throwError) --, catchError) | ||||||
| import Control.Monad.State.Strict | import Control.Monad.State.Strict | ||||||
|  | import Data.Bifunctor (bimap, second) | ||||||
| import Data.Char | import Data.Char | ||||||
| import Data.Data | import Data.Data | ||||||
| import Data.Decimal (DecimalRaw (Decimal), Decimal) | import Data.Decimal (DecimalRaw (Decimal), Decimal) | ||||||
| @ -384,6 +384,8 @@ datep' mYear = do | |||||||
|       "partial date "++dateStr++" found, but the current year is unknown" |       "partial date "++dateStr++" found, but the current year is unknown" | ||||||
|       where dateStr = show month ++ [sep] ++ show day |       where dateStr = show month ++ [sep] ++ show day | ||||||
| 
 | 
 | ||||||
|  | {-# INLINABLE datep' #-} | ||||||
|  | 
 | ||||||
| -- | Parse a date and time in YYYY/MM/DD HH:MM[:SS][+-ZZZZ] format. | -- | Parse a date and time in YYYY/MM/DD HH:MM[:SS][+-ZZZZ] format. | ||||||
| -- Hyphen (-) and period (.) are also allowed as date separators. | -- Hyphen (-) and period (.) are also allowed as date separators. | ||||||
| -- The year may be omitted if a default year has been set. | -- The year may be omitted if a default year has been set. | ||||||
| @ -871,211 +873,235 @@ data AmbiguousNumber = AmbiguousNumber DigitGrp Char DigitGrp  -- 1,000 | |||||||
| multilinecommentp :: TextParser m () | multilinecommentp :: TextParser m () | ||||||
| multilinecommentp = startComment *> anyLine `skipManyTill` endComment | multilinecommentp = startComment *> anyLine `skipManyTill` endComment | ||||||
|   where |   where | ||||||
|     startComment = string "comment" >> skipLine |     startComment = string "comment" *> trailingSpaces | ||||||
|     endComment = eof <|> string "end comment" *> skipLine |     endComment = eof <|> string "end comment" *> trailingSpaces | ||||||
| 
 | 
 | ||||||
|     skipLine = void $ skipMany spacenonewline *> newline |     trailingSpaces = skipMany spacenonewline <* newline | ||||||
|     anyLine = takeWhileP Nothing (\c -> c /= '\n') *> newline |     anyLine = void $ takeWhileP Nothing (\c -> c /= '\n') *> newline | ||||||
|  | 
 | ||||||
|  | {-# INLINABLE multilinecommentp #-} | ||||||
| 
 | 
 | ||||||
| emptyorcommentlinep :: TextParser m () | emptyorcommentlinep :: TextParser m () | ||||||
| emptyorcommentlinep = do | emptyorcommentlinep = do | ||||||
|   skipMany spacenonewline |   skipMany spacenonewline | ||||||
|   void linecommentp <|> void newline |   skiplinecommentp <|> void newline | ||||||
|  |   where | ||||||
|  |     -- A line (file-level) comment can start with a semicolon, hash, or star | ||||||
|  |     -- (allowing org nodes). | ||||||
|  |     skiplinecommentp :: TextParser m () | ||||||
|  |     skiplinecommentp = do | ||||||
|  |       satisfy $ \c -> c == ';' || c == '#' || c == '*' | ||||||
|  |       void $ takeWhileP Nothing (\c -> c /= '\n') | ||||||
|  |       optional newline | ||||||
|  |       pure () | ||||||
| 
 | 
 | ||||||
| -- | Parse a possibly multi-line comment following a semicolon. | {-# INLINABLE emptyorcommentlinep #-} | ||||||
| followingcommentp :: TextParser m Text |  | ||||||
| followingcommentp = T.unlines . map snd <$> followingcommentlinesp |  | ||||||
| 
 | 
 | ||||||
| followingcommentlinesp :: TextParser m [(SourcePos, Text)] | -- A parser combinator for parsing (possibly multiline) comments | ||||||
| followingcommentlinesp = do | -- following journal items. | ||||||
|   skipMany spacenonewline |  | ||||||
| 
 |  | ||||||
|   samelineComment@(_, samelineCommentText) |  | ||||||
|     <- try commentp <|> (,) <$> (getPosition <* eolof) <*> pure "" |  | ||||||
|   newlineComments <- many $ try $ do |  | ||||||
|     skipSome spacenonewline -- leading whitespace is required |  | ||||||
|     commentp |  | ||||||
| 
 |  | ||||||
|   if T.null samelineCommentText && null newlineComments |  | ||||||
|     then pure [] |  | ||||||
|     else pure $ samelineComment : newlineComments |  | ||||||
| 
 |  | ||||||
| -- | Parse a possibly multi-line comment following a semicolon, and |  | ||||||
| -- any tags and/or posting dates within it. Posting dates can be |  | ||||||
| -- expressed with "date"/"date2" tags and/or bracketed dates.  The |  | ||||||
| -- dates are parsed in full here so that errors are reported in the |  | ||||||
| -- right position. Missing years can be inferred if a default date is |  | ||||||
| -- provided. |  | ||||||
| -- | -- | ||||||
| -- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; a:b, date:3/4, [=5/6]" | -- Several journal items may be followed by comments, which begin with | ||||||
|  | -- semicolons and extend to the end of the line. Such comments may span | ||||||
|  | -- multiple lines, but comment lines below the journal item must be | ||||||
|  | -- preceeded by leading whitespace. | ||||||
|  | -- | ||||||
|  | -- This parser combinator accepts a parser that consumes all input up | ||||||
|  | -- until the next newline. This parser should extract the "content" from | ||||||
|  | -- comments. The resulting parser returns this content plus the raw text | ||||||
|  | -- of the comment itself. | ||||||
|  | followingcommentp' :: (Monoid a) => TextParser m a -> TextParser m (Text, a) | ||||||
|  | followingcommentp' contentp = do | ||||||
|  |   skipMany spacenonewline | ||||||
|  |   sameLine <- try headerp *> match' contentp <|> pure ("", mempty) | ||||||
|  |   _ <- eolof | ||||||
|  |   lowerLines <- many $ | ||||||
|  |     try (skipSome spacenonewline *> headerp) *> match' contentp <* eolof | ||||||
|  | 
 | ||||||
|  |   let (textLines, results) = unzip $ sameLine : lowerLines | ||||||
|  |       strippedCommentText = T.unlines $ map T.strip textLines | ||||||
|  |       result = mconcat results | ||||||
|  |   pure (strippedCommentText, result) | ||||||
|  | 
 | ||||||
|  |   where | ||||||
|  |     headerp = char ';' *> skipMany spacenonewline | ||||||
|  | 
 | ||||||
|  | {-# INLINABLE followingcommentp' #-} | ||||||
|  | 
 | ||||||
|  | -- | Parse the text of a (possibly multiline) comment following a journal | ||||||
|  | -- item. | ||||||
|  | followingcommentp :: TextParser m Text | ||||||
|  | followingcommentp = | ||||||
|  |   fst <$> followingcommentp' (void $ takeWhileP Nothing (/= '\n')) | ||||||
|  | {-# INLINABLE followingcommentp #-} | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | -- | Parse a transaction comment and extract its tags. | ||||||
|  | -- | ||||||
|  | -- The first line of a transaction may be followed by comments, which | ||||||
|  | -- begin with semicolons and extend to the end of the line. Transaction | ||||||
|  | -- comments may span multiple lines, but comment lines below the | ||||||
|  | -- transaction must be preceeded by leading whitespace. | ||||||
|  | -- | ||||||
|  | -- 2000/1/1 ; a transaction comment starting on the same line ... | ||||||
|  | --   ; extending to the next line | ||||||
|  | --   account1  $1 | ||||||
|  | --   account2 | ||||||
|  | -- | ||||||
|  | -- Tags are name-value pairs. | ||||||
|  | -- | ||||||
|  | -- >>> let getTags (_,tags) = tags | ||||||
|  | -- >>> let parseTags = fmap getTags . rtp transactioncommentp | ||||||
|  | -- | ||||||
|  | -- >>> parseTags "; name1: val1, name2:all this is value2" | ||||||
|  | -- Right [("name1","val1"),("name2","all this is value2")] | ||||||
|  | -- | ||||||
|  | -- A tag's name must be immediately followed by a colon, without | ||||||
|  | -- separating whitespace. The corresponding value consists of all the text | ||||||
|  | -- following the colon up until the next colon or newline, stripped of | ||||||
|  | -- leading and trailing whitespace. | ||||||
|  | -- | ||||||
|  | transactioncommentp :: TextParser m (Text, [Tag]) | ||||||
|  | transactioncommentp = followingcommentp' commenttagsp | ||||||
|  | {-# INLINABLE transactioncommentp #-} | ||||||
|  | 
 | ||||||
|  | commenttagsp :: TextParser m [Tag] | ||||||
|  | commenttagsp = do | ||||||
|  |   tagName <- fmap (last . T.split isSpace) | ||||||
|  |             $ takeWhileP Nothing (\c -> c /= ':' && c /= '\n') | ||||||
|  |   atColon tagName <|> pure [] -- if not ':', then either '\n' or EOF | ||||||
|  | 
 | ||||||
|  |   where | ||||||
|  |     atColon :: Text -> TextParser m [Tag] | ||||||
|  |     atColon name = char ':' *> do | ||||||
|  |       if T.null name | ||||||
|  |         then commenttagsp | ||||||
|  |         else do | ||||||
|  |           skipMany spacenonewline | ||||||
|  |           val <- tagValue | ||||||
|  |           let tag = (name, val) | ||||||
|  |           (tag:) <$> commenttagsp | ||||||
|  | 
 | ||||||
|  |     tagValue :: TextParser m Text | ||||||
|  |     tagValue = do | ||||||
|  |       val <- T.strip <$> takeWhileP Nothing (\c -> c /= ',' && c /= '\n') | ||||||
|  |       _ <- optional $ char ',' | ||||||
|  |       pure val | ||||||
|  | 
 | ||||||
|  | {-# INLINABLE commenttagsp #-} | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | -- | Parse a posting comment and extract its tags and dates. | ||||||
|  | -- | ||||||
|  | -- Postings may be followed by comments, which begin with semicolons and | ||||||
|  | -- extend to the end of the line. Posting comments may span multiple | ||||||
|  | -- lines, but comment lines below the posting must be preceeded by | ||||||
|  | -- leading whitespace. | ||||||
|  | -- | ||||||
|  | -- 2000/1/1 | ||||||
|  | --   account1  $1 ; a posting comment starting on the same line ... | ||||||
|  | --   ; extending to the next line | ||||||
|  | -- | ||||||
|  | --   account2 | ||||||
|  | --   ; a posting comment beginning on the next line | ||||||
|  | -- | ||||||
|  | -- Tags are name-value pairs. | ||||||
|  | -- | ||||||
|  | -- >>> let getTags (_,tags,_,_) = tags | ||||||
|  | -- >>> let parseTags = fmap getTags . rtp (postingcommentp Nothing) | ||||||
|  | -- | ||||||
|  | -- >>> parseTags "; name1: val1, name2:all this is value2" | ||||||
|  | -- Right [("name1","val1"),("name2","all this is value2")] | ||||||
|  | -- | ||||||
|  | -- A tag's name must be immediately followed by a colon, without | ||||||
|  | -- separating whitespace. The corresponding value consists of all the text | ||||||
|  | -- following the colon up until the next colon or newline, stripped of | ||||||
|  | -- leading and trailing whitespace. | ||||||
|  | -- | ||||||
|  | -- Posting dates may be expressed with "date"/"date2" tags or with | ||||||
|  | -- bracketed date syntax. Posting dates will inherit their year from the | ||||||
|  | -- transaction date if the year is not specified. We throw parse errors on | ||||||
|  | -- invalid dates. | ||||||
|  | -- | ||||||
|  | -- >>> let getDates (_,_,d1,d2) = (d1, d2) | ||||||
|  | -- >>> let parseDates = fmap getDates . rtp (postingcommentp (Just 2000)) | ||||||
|  | -- | ||||||
|  | -- >>> parseDates "; date: 1/2, date2: 1999/12/31" | ||||||
|  | -- Right (Just 2000-01-02,Just 1999-12-31) | ||||||
|  | -- >>> parseDates "; [1/2=1999/12/31]" | ||||||
|  | -- Right (Just 2000-01-02,Just 1999-12-31) | ||||||
|  | -- | ||||||
|  | -- Example: tags, date tags, and bracketed dates | ||||||
|  | -- >>> rtp (postingcommentp (Just 2000)) "; a:b, date:3/4, [=5/6]" | ||||||
| -- Right ("a:b, date:3/4, [=5/6]\n",[("a","b"),("date","3/4")],Just 2000-03-04,Just 2000-05-06) | -- Right ("a:b, date:3/4, [=5/6]\n",[("a","b"),("date","3/4")],Just 2000-03-04,Just 2000-05-06) | ||||||
| -- | -- | ||||||
| -- Year unspecified and no default provided -> unknown year error, at correct position: | -- Example: extraction of dates from date tags ignores trailing text | ||||||
| -- >>> rejp (followingcommentandtagsp Nothing) "  ;    xxx   date:3/4\n  ; second line" | -- >>> rtp (postingcommentp (Just 2000)) "; date:3/4=5/6" | ||||||
| -- Left ...1:22...partial date 3/4 found, but the current year is unknown... |  | ||||||
| -- |  | ||||||
| -- Date tag value contains trailing text - forgot the comma, confused: |  | ||||||
| -- the syntaxes ?  We'll accept the leading date anyway |  | ||||||
| -- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; date:3/4=5/6" |  | ||||||
| -- Right ("date:3/4=5/6\n",[("date","3/4=5/6")],Just 2000-03-04,Nothing) | -- Right ("date:3/4=5/6\n",[("date","3/4=5/6")],Just 2000-03-04,Nothing) | ||||||
| -- | -- | ||||||
| followingcommentandtagsp | postingcommentp | ||||||
|   :: Monad m |   :: Maybe Year -> TextParser m (Text, [Tag], Maybe Day, Maybe Day) | ||||||
|   => Maybe Day | postingcommentp mYear = do | ||||||
|   -> ErroringJournalParser m (Text, [Tag], Maybe Day, Maybe Day) |   (commentText, (tags, dateTags)) <- | ||||||
| followingcommentandtagsp mdefdate = do |     followingcommentp' (commenttagsanddatesp mYear) | ||||||
|   -- pdbg 0 "followingcommentandtagsp" |   let mdate  = fmap snd $ find ((=="date") .fst) dateTags | ||||||
|  |       mdate2 = fmap snd $ find ((=="date2").fst) dateTags | ||||||
|  |   pure (commentText, tags, mdate, mdate2) | ||||||
|  | {-# INLINABLE postingcommentp #-} | ||||||
| 
 | 
 | ||||||
|   commentLines <- lift followingcommentlinesp |  | ||||||
|   -- pdbg 0 $ "commentws:" ++ show commentLines |  | ||||||
| 
 | 
 | ||||||
|   -- Reparse the comment for any tags. | commenttagsanddatesp | ||||||
|   tagsWithPositions <- case |   :: Maybe Year -> TextParser m ([Tag], [DateTag]) | ||||||
|     traverse (runTextParserAt tagswithvaluepositions) commentLines of | commenttagsanddatesp mYear = do | ||||||
|       Right tss -> pure $ concat tss |   (txt, dateTags) <- match $ readUpTo ':' | ||||||
|       Left e    -> throwError $ parseErrorPretty e |   -- next char is either ':' or '\n' (or EOF) | ||||||
| 
 |   let tagName = last (T.split isSpace txt) | ||||||
|   -- Extract date-tag style posting dates from the tags. |   (fmap.second) (dateTags++) (atColon tagName) <|> pure ([], dateTags) -- if not ':', then either '\n' or EOF | ||||||
|   -- Use the transaction date for defaults, if provided. |  | ||||||
|   let isDateLabel txt = txt == "date" || txt == "date2" |  | ||||||
|       isDateTag = isDateLabel . fst . snd |  | ||||||
|   tagDates <- case traverse tagDate $ filter isDateTag tagsWithPositions of |  | ||||||
|       Right ds -> pure ds |  | ||||||
|       Left e   -> throwError $ parseErrorPretty e |  | ||||||
| 
 |  | ||||||
|   -- Reparse the comment for any bracketed style posting dates. |  | ||||||
|   -- Use the transaction date for defaults, if provided. |  | ||||||
|   bracketedDates <- case |  | ||||||
|     traverse (runTextParserAt (bracketedpostingdatesp mdefdate)) |  | ||||||
|              commentLines of |  | ||||||
|       Right dss -> pure $ concat dss |  | ||||||
|       Left e    -> throwError $ parseErrorPretty e |  | ||||||
| 
 |  | ||||||
|   let pdates = tagDates ++ bracketedDates |  | ||||||
|       mdate  = fmap snd $ find ((=="date") .fst) pdates |  | ||||||
|       mdate2 = fmap snd $ find ((=="date2").fst) pdates |  | ||||||
|   -- pdbg 0 $ "allDates: "++show pdates |  | ||||||
| 
 |  | ||||||
|   let strippedComment = T.unlines $ map (T.strip . snd) commentLines |  | ||||||
|       tags = map snd tagsWithPositions |  | ||||||
|   -- pdbg 0 $ "comment:"++show strippedComment |  | ||||||
| 
 |  | ||||||
|   pure (strippedComment, tags, mdate, mdate2) |  | ||||||
| 
 | 
 | ||||||
|   where |   where | ||||||
|     runTextParserAt parser (pos, txt) = |     readUpTo :: Char -> TextParser m [DateTag] | ||||||
|       runTextParser (setPosition pos *> parser) txt |     readUpTo end = do | ||||||
|  |       void $ takeWhileP Nothing (\c -> c /= end && c /= '\n' && c /= '[') | ||||||
|  |       -- if not '[' then ':' or '\n' or EOF | ||||||
|  |       atBracket (readUpTo end) <|> pure [] | ||||||
| 
 | 
 | ||||||
|     tagDate :: (SourcePos, Tag) |     atBracket :: TextParser m [DateTag] -> TextParser m [DateTag] | ||||||
|             -> Either (ParseError Char CustomErr) (TagName, Day) |     atBracket cont = do | ||||||
|     tagDate (pos, (name, value)) = |       -- Uses the fact that bracketed date-tags cannot contain newlines | ||||||
|       fmap (name,) $ runTextParserAt (datep' myear) (pos, value) |       dateTags <- option [] $ lookAhead (bracketeddatetagsp mYear) | ||||||
|       where myear = fmap (first3 . toGregorian) mdefdate |       _ <- char '[' | ||||||
|  |       dateTags' <- cont | ||||||
|  |       pure $ dateTags ++ dateTags' | ||||||
| 
 | 
 | ||||||
| -- A transaction/posting comment must start with a semicolon. This parser |     atColon :: Text -> TextParser m ([Tag], [DateTag]) | ||||||
| -- discards the leading whitespace of the comment and returns the source |     atColon name = char ':' *> do | ||||||
| -- position of the comment's first non-whitespace character. |       skipMany spacenonewline | ||||||
| commentp :: TextParser m (SourcePos, Text) |       (tags, dateTags) <- case name of | ||||||
| commentp = commentStartingWithp (==';') |         ""      -> pure ([], []) | ||||||
|  |         "date"  -> dateValue name | ||||||
|  |         "date2" -> dateValue name | ||||||
|  |         _       -> tagValue name | ||||||
|  |       _ <- optional $ char ',' | ||||||
|  |       bimap (tags++) (dateTags++) <$> commenttagsanddatesp mYear | ||||||
| 
 | 
 | ||||||
| -- A line (file-level) comment can start with a semicolon, hash, or star |     dateValue :: Text -> TextParser m ([Tag], [DateTag]) | ||||||
| -- (allowing org nodes). This parser discards the leading whitespace of |     dateValue name = do | ||||||
| -- the comment and returns the source position of the comment's first |       (txt, (date, dateTags)) <- match' $ do | ||||||
| -- non-whitespace character. |         date <- datep' mYear | ||||||
| linecommentp :: TextParser m (SourcePos, Text) |         dateTags <- readUpTo ',' | ||||||
| linecommentp = |         pure (date, dateTags) | ||||||
|   commentStartingWithp $ \c -> c == ';' || c == '#' || c == '*' |       let val = T.strip txt | ||||||
|  |       pure $ ( [(name, val)] | ||||||
|  |              , (name, date) : dateTags ) | ||||||
| 
 | 
 | ||||||
| commentStartingWithp :: (Char -> Bool) -> TextParser m (SourcePos, Text) |     tagValue :: Text -> TextParser m ([Tag], [DateTag]) | ||||||
| commentStartingWithp f = do |     tagValue name = do | ||||||
|   -- ptrace "commentStartingWith" |       (txt, dateTags) <- match' $ readUpTo ',' | ||||||
|   satisfy f |       let val = T.strip txt | ||||||
|   skipMany spacenonewline |       pure $ ( [(name, val)] | ||||||
|   startPos <- getPosition |              , dateTags ) | ||||||
|   content <- takeWhileP Nothing (\c -> c /= '\n') |  | ||||||
|   optional newline |  | ||||||
|   return (startPos, content) |  | ||||||
| 
 | 
 | ||||||
| --- ** tags | {-# INLINABLE commenttagsanddatesp #-} | ||||||
| 
 | 
 | ||||||
| -- | Extract any tags (name:value ended by comma or newline) embedded in a string. |  | ||||||
| -- |  | ||||||
| -- >>> commentTags "a b:, c:c d:d, e" |  | ||||||
| -- [("b",""),("c","c d:d")] |  | ||||||
| -- |  | ||||||
| -- >>> commentTags "a [1/1/1] [1/1] [1], [=1/1/1] [=1/1] [=1] [1/1=1/1/1] [1=1/1/1] b:c" |  | ||||||
| -- [("b","c")] |  | ||||||
| -- |  | ||||||
| -- --[("date","1/1/1"),("date","1/1"),("date2","1/1/1"),("date2","1/1"),("date","1/1"),("date2","1/1/1"),("date","1"),("date2","1/1/1")] |  | ||||||
| -- |  | ||||||
| -- >>> commentTags "\na b:, \nd:e, f" |  | ||||||
| -- [("b",""),("d","e")] |  | ||||||
| -- |  | ||||||
| -- >>> commentTags ":value" |  | ||||||
| -- [] |  | ||||||
| -- |  | ||||||
| commentTags :: Text -> [Tag] |  | ||||||
| commentTags s = either (const []) id $ runTextParser tagsp s |  | ||||||
| 
 |  | ||||||
| -- | Parse all tags found in a string. |  | ||||||
| tagsp :: SimpleTextParser [Tag] |  | ||||||
| tagsp = map snd <$> tagswithvaluepositions |  | ||||||
| 
 |  | ||||||
| tagswithvaluepositions :: SimpleTextParser [(SourcePos, Tag)] |  | ||||||
| tagswithvaluepositions = do |  | ||||||
|   -- pdbg 0 $ "tagsp" |  | ||||||
| 
 |  | ||||||
|   -- If we parse in a single pass, we cannot know whether some text |  | ||||||
|   -- belongs to a tag label until we have reached a colon (in which case |  | ||||||
|   -- it does) or whitespace (in which case it does not). Therefore, we |  | ||||||
|   -- hold on to the text until we reach such a break point, and then |  | ||||||
|   -- decide what to do. |  | ||||||
| 
 |  | ||||||
|   potentialTagName <- tillNextBreak |  | ||||||
|   atSpaceChar <|> atColon potentialTagName <|> atEof |  | ||||||
| 
 |  | ||||||
|   where |  | ||||||
| 
 |  | ||||||
|     isBreak :: Char -> Bool |  | ||||||
|     isBreak c = isSpace c || c == ':' |  | ||||||
| 
 |  | ||||||
|     tillNextBreak :: SimpleTextParser Text |  | ||||||
|     tillNextBreak = takeWhileP Nothing (not . isBreak) |  | ||||||
| 
 |  | ||||||
|     tagValue :: SimpleTextParser Text |  | ||||||
|     tagValue = T.strip <$> takeWhileP Nothing (not . commaOrNewline) |  | ||||||
|       where commaOrNewline c = c == ',' || c == '\n' |  | ||||||
| 
 |  | ||||||
|     atSpaceChar :: SimpleTextParser [(SourcePos, Tag)] |  | ||||||
|     atSpaceChar = skipSome spaceChar *> tagswithvaluepositions |  | ||||||
| 
 |  | ||||||
|     atColon :: Text -> SimpleTextParser [(SourcePos, Tag)] |  | ||||||
|     atColon tagName = do |  | ||||||
|       char ':' |  | ||||||
|       if T.null tagName |  | ||||||
|         then tagswithvaluepositions |  | ||||||
|         else do |  | ||||||
|           pos <- getPosition |  | ||||||
|           tagVal <- tagValue |  | ||||||
|           let tag = (pos, (tagName, tagVal)) |  | ||||||
|           tags <- tagswithvaluepositions |  | ||||||
|           pure $ tag : tags |  | ||||||
| 
 |  | ||||||
|     atEof :: SimpleTextParser [(SourcePos, Tag)] |  | ||||||
|     atEof = eof *> pure [] |  | ||||||
| 
 |  | ||||||
| --- ** posting dates |  | ||||||
| 
 |  | ||||||
| -- | Parse all bracketed posting dates found in a string. The dates are |  | ||||||
| -- parsed fully to give useful errors. Missing years can be inferred only |  | ||||||
| -- if a default date is provided. |  | ||||||
| -- |  | ||||||
| bracketedpostingdatesp :: Maybe Day -> SimpleTextParser [(TagName,Day)] |  | ||||||
| bracketedpostingdatesp mdefdate = do |  | ||||||
|   -- pdbg 0 $ "bracketedpostingdatesp" |  | ||||||
|   skipMany $ notChar '[' |  | ||||||
|   concat <$> sepEndBy (bracketeddatetagsp mdefdate <|> char '[' *> pure []) |  | ||||||
|                       (skipMany $ notChar '[') |  | ||||||
| 
 | 
 | ||||||
| --- ** bracketed dates | --- ** bracketed dates | ||||||
| 
 | 
 | ||||||
| @ -1105,8 +1131,9 @@ bracketedpostingdatesp mdefdate = do | |||||||
| -- >>> either (Left . parseErrorPretty) Right $ rtp (bracketeddatetagsp Nothing) "[0123456789/-.=/-.=]" | -- >>> either (Left . parseErrorPretty) Right $ rtp (bracketeddatetagsp Nothing) "[0123456789/-.=/-.=]" | ||||||
| -- Left ...1:13:...expecting month or day... | -- Left ...1:13:...expecting month or day... | ||||||
| -- | -- | ||||||
| bracketeddatetagsp :: Maybe Day -> SimpleTextParser [(TagName, Day)] | bracketeddatetagsp | ||||||
| bracketeddatetagsp mdefdate = do |   :: Maybe Year -> TextParser m [(TagName, Day)] | ||||||
|  | bracketeddatetagsp mYear1 = do | ||||||
|   -- pdbg 0 "bracketeddatetagsp" |   -- pdbg 0 "bracketeddatetagsp" | ||||||
|   try $ do |   try $ do | ||||||
|     s <- lookAhead |     s <- lookAhead | ||||||
| @ -1117,14 +1144,24 @@ bracketeddatetagsp mdefdate = do | |||||||
|   -- Looks sufficiently like a bracketed date to commit to parsing a date |   -- Looks sufficiently like a bracketed date to commit to parsing a date | ||||||
| 
 | 
 | ||||||
|   between (char '[') (char ']') $ do |   between (char '[') (char ']') $ do | ||||||
|     let myear1 = fmap readYear mdefdate |     md1 <- optional $ datep' mYear1 | ||||||
|     md1 <- optional $ datep' myear1 |  | ||||||
| 
 | 
 | ||||||
|     let myear2 = fmap readYear md1 <|> myear1 |     let mYear2 = fmap readYear md1 <|> mYear1 | ||||||
|     md2 <- optional $ char '=' *> datep' myear2 |     md2 <- optional $ char '=' *> datep' mYear2 | ||||||
| 
 | 
 | ||||||
|     pure $ catMaybes [("date",) <$> md1, ("date2",) <$> md2] |     pure $ catMaybes [("date",) <$> md1, ("date2",) <$> md2] | ||||||
| 
 | 
 | ||||||
|   where |   where | ||||||
|     readYear = first3 . toGregorian |     readYear = first3 . toGregorian | ||||||
|     isBracketedDateChar c = isDigit c || isDateSepChar c || c == '=' |     isBracketedDateChar c = isDigit c || isDateSepChar c || c == '=' | ||||||
|  | 
 | ||||||
|  | {-# INLINABLE bracketeddatetagsp #-} | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | --- ** helper parsers | ||||||
|  | 
 | ||||||
|  | -- A version of `match` that is strict in the returned text | ||||||
|  | match' :: TextParser m a -> TextParser m (Text, a) | ||||||
|  | match' p = do | ||||||
|  |   (!txt, p) <- match p | ||||||
|  |   pure (txt, p) | ||||||
|  | |||||||
| @ -248,12 +248,7 @@ accountdirectivep = do | |||||||
|   macode' :: Maybe String <- (optional $ lift $ skipSome spacenonewline >> some digitChar) |   macode' :: Maybe String <- (optional $ lift $ skipSome spacenonewline >> some digitChar) | ||||||
|   let macode :: Maybe AccountCode = read <$> macode' |   let macode :: Maybe AccountCode = read <$> macode' | ||||||
|   newline |   newline | ||||||
|   _tags <- many $ do |   skipMany indentedlinep | ||||||
|     startpos <- getPosition |  | ||||||
|     l <- indentedlinep |  | ||||||
|     case runTextParser (setPosition startpos >> tagsp) $ T.pack l of |  | ||||||
|       Right ts -> return ts |  | ||||||
|       Left _e   -> return [] -- TODO throwError $ parseErrorPretty e |  | ||||||
|      |      | ||||||
|   modify' (\j -> j{jaccounts = (acct, macode) : jaccounts j}) |   modify' (\j -> j{jaccounts = (acct, macode) : jaccounts j}) | ||||||
| 
 | 
 | ||||||
| @ -477,9 +472,9 @@ transactionp = do | |||||||
|   status <- lift statusp <?> "cleared status" |   status <- lift statusp <?> "cleared status" | ||||||
|   code <- lift codep <?> "transaction code" |   code <- lift codep <?> "transaction code" | ||||||
|   description <- T.strip <$> descriptionp |   description <- T.strip <$> descriptionp | ||||||
|   comment <- lift followingcommentp |   (comment, tags) <- lift transactioncommentp | ||||||
|   let tags = commentTags comment |   let year = first3 $ toGregorian date | ||||||
|   postings <- postingsp (Just date) |   postings <- postingsp (Just year) | ||||||
|   pos' <- getPosition |   pos' <- getPosition | ||||||
|   let sourcepos = journalSourcePos pos pos' |   let sourcepos = journalSourcePos pos pos' | ||||||
|   return $ txnTieKnot $ Transaction 0 sourcepos date edate status code description comment tags postings "" |   return $ txnTieKnot $ Transaction 0 sourcepos date edate status code description comment tags postings "" | ||||||
| @ -581,8 +576,8 @@ test_transactionp = do | |||||||
| 
 | 
 | ||||||
| -- Parse the following whitespace-beginning lines as postings, posting | -- Parse the following whitespace-beginning lines as postings, posting | ||||||
| -- tags, and/or comments (inferring year, if needed, from the given date). | -- tags, and/or comments (inferring year, if needed, from the given date). | ||||||
| postingsp :: MonadIO m => Maybe Day -> ErroringJournalParser m [Posting] | postingsp :: MonadIO m => Maybe Year -> ErroringJournalParser m [Posting] | ||||||
| postingsp mdate = many (postingp mdate) <?> "postings" | postingsp mTransactionYear = many (postingp mTransactionYear) <?> "postings" | ||||||
| 
 | 
 | ||||||
| -- linebeginningwithspaces :: Monad m => JournalParser m String | -- linebeginningwithspaces :: Monad m => JournalParser m String | ||||||
| -- linebeginningwithspaces = do | -- linebeginningwithspaces = do | ||||||
| @ -591,8 +586,8 @@ postingsp mdate = many (postingp mdate) <?> "postings" | |||||||
| --   cs <- lift restofline | --   cs <- lift restofline | ||||||
| --   return $ sp ++ (c:cs) ++ "\n" | --   return $ sp ++ (c:cs) ++ "\n" | ||||||
| 
 | 
 | ||||||
| postingp :: MonadIO m => Maybe Day -> ErroringJournalParser m Posting | postingp :: MonadIO m => Maybe Year -> ErroringJournalParser m Posting | ||||||
| postingp mtdate = do | postingp mTransactionYear = do | ||||||
|   -- pdbg 0 "postingp" |   -- pdbg 0 "postingp" | ||||||
|   (status, account) <- try $ do |   (status, account) <- try $ do | ||||||
|     lift (skipSome spacenonewline) |     lift (skipSome spacenonewline) | ||||||
| @ -605,7 +600,7 @@ postingp mtdate = do | |||||||
|   massertion <- partialbalanceassertionp |   massertion <- partialbalanceassertionp | ||||||
|   _ <- fixedlotpricep |   _ <- fixedlotpricep | ||||||
|   lift (skipMany spacenonewline) |   lift (skipMany spacenonewline) | ||||||
|   (comment,tags,mdate,mdate2) <- followingcommentandtagsp mtdate |   (comment,tags,mdate,mdate2) <- lift $ postingcommentp mTransactionYear | ||||||
|   return posting |   return posting | ||||||
|    { pdate=mdate |    { pdate=mdate | ||||||
|    , pdate2=mdate2 |    , pdate2=mdate2 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user