lib: rewrite comment parsers [API]
- avoid the "re-parsing" of comments using the `match` parser combinator
This commit is contained in:
parent
d707b351cc
commit
9b6558401f
@ -183,6 +183,7 @@ instance NFData PostingType
|
|||||||
type TagName = Text
|
type TagName = Text
|
||||||
type TagValue = Text
|
type TagValue = Text
|
||||||
type Tag = (TagName, TagValue) -- ^ A tag name and (possibly empty) value.
|
type Tag = (TagName, TagValue) -- ^ A tag name and (possibly empty) value.
|
||||||
|
type DateTag = (TagName, Day)
|
||||||
|
|
||||||
-- | The status of a transaction or posting, recorded with a status mark
|
-- | The status of a transaction or posting, recorded with a status mark
|
||||||
-- (nothing, !, or *). What these mean is ultimately user defined.
|
-- (nothing, !, or *). What these mean is ultimately user defined.
|
||||||
|
|||||||
@ -14,6 +14,7 @@ Some of these might belong in Hledger.Read.JournalReader or Hledger.Read.
|
|||||||
|
|
||||||
--- * module
|
--- * module
|
||||||
{-# LANGUAGE CPP, BangPatterns, DeriveDataTypeable, RecordWildCards, NamedFieldPuns, NoMonoLocalBinds, ScopedTypeVariables, FlexibleContexts, TupleSections, OverloadedStrings #-}
|
{-# LANGUAGE CPP, BangPatterns, DeriveDataTypeable, RecordWildCards, NamedFieldPuns, NoMonoLocalBinds, ScopedTypeVariables, FlexibleContexts, TupleSections, OverloadedStrings #-}
|
||||||
|
{-# LANGUAGE TypeFamilies #-}
|
||||||
{-# LANGUAGE LambdaCase #-}
|
{-# LANGUAGE LambdaCase #-}
|
||||||
{-# LANGUAGE PackageImports #-}
|
{-# LANGUAGE PackageImports #-}
|
||||||
|
|
||||||
@ -81,12 +82,10 @@ module Hledger.Read.Common (
|
|||||||
-- ** comments
|
-- ** comments
|
||||||
multilinecommentp,
|
multilinecommentp,
|
||||||
emptyorcommentlinep,
|
emptyorcommentlinep,
|
||||||
followingcommentp,
|
|
||||||
followingcommentandtagsp,
|
|
||||||
|
|
||||||
-- ** tags
|
followingcommentp,
|
||||||
commentTags,
|
transactioncommentp,
|
||||||
tagsp,
|
postingcommentp,
|
||||||
|
|
||||||
-- ** bracketed dates
|
-- ** bracketed dates
|
||||||
bracketeddatetagsp
|
bracketeddatetagsp
|
||||||
@ -98,6 +97,7 @@ import "base-compat-batteries" Prelude.Compat hiding (readFile)
|
|||||||
import "base-compat-batteries" Control.Monad.Compat
|
import "base-compat-batteries" Control.Monad.Compat
|
||||||
import Control.Monad.Except (ExceptT(..), runExceptT, throwError) --, catchError)
|
import Control.Monad.Except (ExceptT(..), runExceptT, throwError) --, catchError)
|
||||||
import Control.Monad.State.Strict
|
import Control.Monad.State.Strict
|
||||||
|
import Data.Bifunctor (bimap, second)
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import Data.Data
|
import Data.Data
|
||||||
import Data.Decimal (DecimalRaw (Decimal), Decimal)
|
import Data.Decimal (DecimalRaw (Decimal), Decimal)
|
||||||
@ -384,6 +384,8 @@ datep' mYear = do
|
|||||||
"partial date "++dateStr++" found, but the current year is unknown"
|
"partial date "++dateStr++" found, but the current year is unknown"
|
||||||
where dateStr = show month ++ [sep] ++ show day
|
where dateStr = show month ++ [sep] ++ show day
|
||||||
|
|
||||||
|
{-# INLINABLE datep' #-}
|
||||||
|
|
||||||
-- | Parse a date and time in YYYY/MM/DD HH:MM[:SS][+-ZZZZ] format.
|
-- | Parse a date and time in YYYY/MM/DD HH:MM[:SS][+-ZZZZ] format.
|
||||||
-- Hyphen (-) and period (.) are also allowed as date separators.
|
-- Hyphen (-) and period (.) are also allowed as date separators.
|
||||||
-- The year may be omitted if a default year has been set.
|
-- The year may be omitted if a default year has been set.
|
||||||
@ -871,211 +873,235 @@ data AmbiguousNumber = AmbiguousNumber DigitGrp Char DigitGrp -- 1,000
|
|||||||
multilinecommentp :: TextParser m ()
|
multilinecommentp :: TextParser m ()
|
||||||
multilinecommentp = startComment *> anyLine `skipManyTill` endComment
|
multilinecommentp = startComment *> anyLine `skipManyTill` endComment
|
||||||
where
|
where
|
||||||
startComment = string "comment" >> skipLine
|
startComment = string "comment" *> trailingSpaces
|
||||||
endComment = eof <|> string "end comment" *> skipLine
|
endComment = eof <|> string "end comment" *> trailingSpaces
|
||||||
|
|
||||||
skipLine = void $ skipMany spacenonewline *> newline
|
trailingSpaces = skipMany spacenonewline <* newline
|
||||||
anyLine = takeWhileP Nothing (\c -> c /= '\n') *> newline
|
anyLine = void $ takeWhileP Nothing (\c -> c /= '\n') *> newline
|
||||||
|
|
||||||
|
{-# INLINABLE multilinecommentp #-}
|
||||||
|
|
||||||
emptyorcommentlinep :: TextParser m ()
|
emptyorcommentlinep :: TextParser m ()
|
||||||
emptyorcommentlinep = do
|
emptyorcommentlinep = do
|
||||||
skipMany spacenonewline
|
skipMany spacenonewline
|
||||||
void linecommentp <|> void newline
|
skiplinecommentp <|> void newline
|
||||||
|
where
|
||||||
|
-- A line (file-level) comment can start with a semicolon, hash, or star
|
||||||
|
-- (allowing org nodes).
|
||||||
|
skiplinecommentp :: TextParser m ()
|
||||||
|
skiplinecommentp = do
|
||||||
|
satisfy $ \c -> c == ';' || c == '#' || c == '*'
|
||||||
|
void $ takeWhileP Nothing (\c -> c /= '\n')
|
||||||
|
optional newline
|
||||||
|
pure ()
|
||||||
|
|
||||||
-- | Parse a possibly multi-line comment following a semicolon.
|
{-# INLINABLE emptyorcommentlinep #-}
|
||||||
followingcommentp :: TextParser m Text
|
|
||||||
followingcommentp = T.unlines . map snd <$> followingcommentlinesp
|
|
||||||
|
|
||||||
followingcommentlinesp :: TextParser m [(SourcePos, Text)]
|
-- A parser combinator for parsing (possibly multiline) comments
|
||||||
followingcommentlinesp = do
|
-- following journal items.
|
||||||
skipMany spacenonewline
|
|
||||||
|
|
||||||
samelineComment@(_, samelineCommentText)
|
|
||||||
<- try commentp <|> (,) <$> (getPosition <* eolof) <*> pure ""
|
|
||||||
newlineComments <- many $ try $ do
|
|
||||||
skipSome spacenonewline -- leading whitespace is required
|
|
||||||
commentp
|
|
||||||
|
|
||||||
if T.null samelineCommentText && null newlineComments
|
|
||||||
then pure []
|
|
||||||
else pure $ samelineComment : newlineComments
|
|
||||||
|
|
||||||
-- | Parse a possibly multi-line comment following a semicolon, and
|
|
||||||
-- any tags and/or posting dates within it. Posting dates can be
|
|
||||||
-- expressed with "date"/"date2" tags and/or bracketed dates. The
|
|
||||||
-- dates are parsed in full here so that errors are reported in the
|
|
||||||
-- right position. Missing years can be inferred if a default date is
|
|
||||||
-- provided.
|
|
||||||
--
|
--
|
||||||
-- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; a:b, date:3/4, [=5/6]"
|
-- Several journal items may be followed by comments, which begin with
|
||||||
|
-- semicolons and extend to the end of the line. Such comments may span
|
||||||
|
-- multiple lines, but comment lines below the journal item must be
|
||||||
|
-- preceeded by leading whitespace.
|
||||||
|
--
|
||||||
|
-- This parser combinator accepts a parser that consumes all input up
|
||||||
|
-- until the next newline. This parser should extract the "content" from
|
||||||
|
-- comments. The resulting parser returns this content plus the raw text
|
||||||
|
-- of the comment itself.
|
||||||
|
followingcommentp' :: (Monoid a) => TextParser m a -> TextParser m (Text, a)
|
||||||
|
followingcommentp' contentp = do
|
||||||
|
skipMany spacenonewline
|
||||||
|
sameLine <- try headerp *> match' contentp <|> pure ("", mempty)
|
||||||
|
_ <- eolof
|
||||||
|
lowerLines <- many $
|
||||||
|
try (skipSome spacenonewline *> headerp) *> match' contentp <* eolof
|
||||||
|
|
||||||
|
let (textLines, results) = unzip $ sameLine : lowerLines
|
||||||
|
strippedCommentText = T.unlines $ map T.strip textLines
|
||||||
|
result = mconcat results
|
||||||
|
pure (strippedCommentText, result)
|
||||||
|
|
||||||
|
where
|
||||||
|
headerp = char ';' *> skipMany spacenonewline
|
||||||
|
|
||||||
|
{-# INLINABLE followingcommentp' #-}
|
||||||
|
|
||||||
|
-- | Parse the text of a (possibly multiline) comment following a journal
|
||||||
|
-- item.
|
||||||
|
followingcommentp :: TextParser m Text
|
||||||
|
followingcommentp =
|
||||||
|
fst <$> followingcommentp' (void $ takeWhileP Nothing (/= '\n'))
|
||||||
|
{-# INLINABLE followingcommentp #-}
|
||||||
|
|
||||||
|
|
||||||
|
-- | Parse a transaction comment and extract its tags.
|
||||||
|
--
|
||||||
|
-- The first line of a transaction may be followed by comments, which
|
||||||
|
-- begin with semicolons and extend to the end of the line. Transaction
|
||||||
|
-- comments may span multiple lines, but comment lines below the
|
||||||
|
-- transaction must be preceeded by leading whitespace.
|
||||||
|
--
|
||||||
|
-- 2000/1/1 ; a transaction comment starting on the same line ...
|
||||||
|
-- ; extending to the next line
|
||||||
|
-- account1 $1
|
||||||
|
-- account2
|
||||||
|
--
|
||||||
|
-- Tags are name-value pairs.
|
||||||
|
--
|
||||||
|
-- >>> let getTags (_,tags) = tags
|
||||||
|
-- >>> let parseTags = fmap getTags . rtp transactioncommentp
|
||||||
|
--
|
||||||
|
-- >>> parseTags "; name1: val1, name2:all this is value2"
|
||||||
|
-- Right [("name1","val1"),("name2","all this is value2")]
|
||||||
|
--
|
||||||
|
-- A tag's name must be immediately followed by a colon, without
|
||||||
|
-- separating whitespace. The corresponding value consists of all the text
|
||||||
|
-- following the colon up until the next colon or newline, stripped of
|
||||||
|
-- leading and trailing whitespace.
|
||||||
|
--
|
||||||
|
transactioncommentp :: TextParser m (Text, [Tag])
|
||||||
|
transactioncommentp = followingcommentp' commenttagsp
|
||||||
|
{-# INLINABLE transactioncommentp #-}
|
||||||
|
|
||||||
|
commenttagsp :: TextParser m [Tag]
|
||||||
|
commenttagsp = do
|
||||||
|
tagName <- fmap (last . T.split isSpace)
|
||||||
|
$ takeWhileP Nothing (\c -> c /= ':' && c /= '\n')
|
||||||
|
atColon tagName <|> pure [] -- if not ':', then either '\n' or EOF
|
||||||
|
|
||||||
|
where
|
||||||
|
atColon :: Text -> TextParser m [Tag]
|
||||||
|
atColon name = char ':' *> do
|
||||||
|
if T.null name
|
||||||
|
then commenttagsp
|
||||||
|
else do
|
||||||
|
skipMany spacenonewline
|
||||||
|
val <- tagValue
|
||||||
|
let tag = (name, val)
|
||||||
|
(tag:) <$> commenttagsp
|
||||||
|
|
||||||
|
tagValue :: TextParser m Text
|
||||||
|
tagValue = do
|
||||||
|
val <- T.strip <$> takeWhileP Nothing (\c -> c /= ',' && c /= '\n')
|
||||||
|
_ <- optional $ char ','
|
||||||
|
pure val
|
||||||
|
|
||||||
|
{-# INLINABLE commenttagsp #-}
|
||||||
|
|
||||||
|
|
||||||
|
-- | Parse a posting comment and extract its tags and dates.
|
||||||
|
--
|
||||||
|
-- Postings may be followed by comments, which begin with semicolons and
|
||||||
|
-- extend to the end of the line. Posting comments may span multiple
|
||||||
|
-- lines, but comment lines below the posting must be preceeded by
|
||||||
|
-- leading whitespace.
|
||||||
|
--
|
||||||
|
-- 2000/1/1
|
||||||
|
-- account1 $1 ; a posting comment starting on the same line ...
|
||||||
|
-- ; extending to the next line
|
||||||
|
--
|
||||||
|
-- account2
|
||||||
|
-- ; a posting comment beginning on the next line
|
||||||
|
--
|
||||||
|
-- Tags are name-value pairs.
|
||||||
|
--
|
||||||
|
-- >>> let getTags (_,tags,_,_) = tags
|
||||||
|
-- >>> let parseTags = fmap getTags . rtp (postingcommentp Nothing)
|
||||||
|
--
|
||||||
|
-- >>> parseTags "; name1: val1, name2:all this is value2"
|
||||||
|
-- Right [("name1","val1"),("name2","all this is value2")]
|
||||||
|
--
|
||||||
|
-- A tag's name must be immediately followed by a colon, without
|
||||||
|
-- separating whitespace. The corresponding value consists of all the text
|
||||||
|
-- following the colon up until the next colon or newline, stripped of
|
||||||
|
-- leading and trailing whitespace.
|
||||||
|
--
|
||||||
|
-- Posting dates may be expressed with "date"/"date2" tags or with
|
||||||
|
-- bracketed date syntax. Posting dates will inherit their year from the
|
||||||
|
-- transaction date if the year is not specified. We throw parse errors on
|
||||||
|
-- invalid dates.
|
||||||
|
--
|
||||||
|
-- >>> let getDates (_,_,d1,d2) = (d1, d2)
|
||||||
|
-- >>> let parseDates = fmap getDates . rtp (postingcommentp (Just 2000))
|
||||||
|
--
|
||||||
|
-- >>> parseDates "; date: 1/2, date2: 1999/12/31"
|
||||||
|
-- Right (Just 2000-01-02,Just 1999-12-31)
|
||||||
|
-- >>> parseDates "; [1/2=1999/12/31]"
|
||||||
|
-- Right (Just 2000-01-02,Just 1999-12-31)
|
||||||
|
--
|
||||||
|
-- Example: tags, date tags, and bracketed dates
|
||||||
|
-- >>> rtp (postingcommentp (Just 2000)) "; a:b, date:3/4, [=5/6]"
|
||||||
-- Right ("a:b, date:3/4, [=5/6]\n",[("a","b"),("date","3/4")],Just 2000-03-04,Just 2000-05-06)
|
-- Right ("a:b, date:3/4, [=5/6]\n",[("a","b"),("date","3/4")],Just 2000-03-04,Just 2000-05-06)
|
||||||
--
|
--
|
||||||
-- Year unspecified and no default provided -> unknown year error, at correct position:
|
-- Example: extraction of dates from date tags ignores trailing text
|
||||||
-- >>> rejp (followingcommentandtagsp Nothing) " ; xxx date:3/4\n ; second line"
|
-- >>> rtp (postingcommentp (Just 2000)) "; date:3/4=5/6"
|
||||||
-- Left ...1:22...partial date 3/4 found, but the current year is unknown...
|
|
||||||
--
|
|
||||||
-- Date tag value contains trailing text - forgot the comma, confused:
|
|
||||||
-- the syntaxes ? We'll accept the leading date anyway
|
|
||||||
-- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; date:3/4=5/6"
|
|
||||||
-- Right ("date:3/4=5/6\n",[("date","3/4=5/6")],Just 2000-03-04,Nothing)
|
-- Right ("date:3/4=5/6\n",[("date","3/4=5/6")],Just 2000-03-04,Nothing)
|
||||||
--
|
--
|
||||||
followingcommentandtagsp
|
postingcommentp
|
||||||
:: Monad m
|
:: Maybe Year -> TextParser m (Text, [Tag], Maybe Day, Maybe Day)
|
||||||
=> Maybe Day
|
postingcommentp mYear = do
|
||||||
-> ErroringJournalParser m (Text, [Tag], Maybe Day, Maybe Day)
|
(commentText, (tags, dateTags)) <-
|
||||||
followingcommentandtagsp mdefdate = do
|
followingcommentp' (commenttagsanddatesp mYear)
|
||||||
-- pdbg 0 "followingcommentandtagsp"
|
let mdate = fmap snd $ find ((=="date") .fst) dateTags
|
||||||
|
mdate2 = fmap snd $ find ((=="date2").fst) dateTags
|
||||||
|
pure (commentText, tags, mdate, mdate2)
|
||||||
|
{-# INLINABLE postingcommentp #-}
|
||||||
|
|
||||||
commentLines <- lift followingcommentlinesp
|
|
||||||
-- pdbg 0 $ "commentws:" ++ show commentLines
|
|
||||||
|
|
||||||
-- Reparse the comment for any tags.
|
commenttagsanddatesp
|
||||||
tagsWithPositions <- case
|
:: Maybe Year -> TextParser m ([Tag], [DateTag])
|
||||||
traverse (runTextParserAt tagswithvaluepositions) commentLines of
|
commenttagsanddatesp mYear = do
|
||||||
Right tss -> pure $ concat tss
|
(txt, dateTags) <- match $ readUpTo ':'
|
||||||
Left e -> throwError $ parseErrorPretty e
|
-- next char is either ':' or '\n' (or EOF)
|
||||||
|
let tagName = last (T.split isSpace txt)
|
||||||
-- Extract date-tag style posting dates from the tags.
|
(fmap.second) (dateTags++) (atColon tagName) <|> pure ([], dateTags) -- if not ':', then either '\n' or EOF
|
||||||
-- Use the transaction date for defaults, if provided.
|
|
||||||
let isDateLabel txt = txt == "date" || txt == "date2"
|
|
||||||
isDateTag = isDateLabel . fst . snd
|
|
||||||
tagDates <- case traverse tagDate $ filter isDateTag tagsWithPositions of
|
|
||||||
Right ds -> pure ds
|
|
||||||
Left e -> throwError $ parseErrorPretty e
|
|
||||||
|
|
||||||
-- Reparse the comment for any bracketed style posting dates.
|
|
||||||
-- Use the transaction date for defaults, if provided.
|
|
||||||
bracketedDates <- case
|
|
||||||
traverse (runTextParserAt (bracketedpostingdatesp mdefdate))
|
|
||||||
commentLines of
|
|
||||||
Right dss -> pure $ concat dss
|
|
||||||
Left e -> throwError $ parseErrorPretty e
|
|
||||||
|
|
||||||
let pdates = tagDates ++ bracketedDates
|
|
||||||
mdate = fmap snd $ find ((=="date") .fst) pdates
|
|
||||||
mdate2 = fmap snd $ find ((=="date2").fst) pdates
|
|
||||||
-- pdbg 0 $ "allDates: "++show pdates
|
|
||||||
|
|
||||||
let strippedComment = T.unlines $ map (T.strip . snd) commentLines
|
|
||||||
tags = map snd tagsWithPositions
|
|
||||||
-- pdbg 0 $ "comment:"++show strippedComment
|
|
||||||
|
|
||||||
pure (strippedComment, tags, mdate, mdate2)
|
|
||||||
|
|
||||||
where
|
where
|
||||||
runTextParserAt parser (pos, txt) =
|
readUpTo :: Char -> TextParser m [DateTag]
|
||||||
runTextParser (setPosition pos *> parser) txt
|
readUpTo end = do
|
||||||
|
void $ takeWhileP Nothing (\c -> c /= end && c /= '\n' && c /= '[')
|
||||||
|
-- if not '[' then ':' or '\n' or EOF
|
||||||
|
atBracket (readUpTo end) <|> pure []
|
||||||
|
|
||||||
tagDate :: (SourcePos, Tag)
|
atBracket :: TextParser m [DateTag] -> TextParser m [DateTag]
|
||||||
-> Either (ParseError Char CustomErr) (TagName, Day)
|
atBracket cont = do
|
||||||
tagDate (pos, (name, value)) =
|
-- Uses the fact that bracketed date-tags cannot contain newlines
|
||||||
fmap (name,) $ runTextParserAt (datep' myear) (pos, value)
|
dateTags <- option [] $ lookAhead (bracketeddatetagsp mYear)
|
||||||
where myear = fmap (first3 . toGregorian) mdefdate
|
_ <- char '['
|
||||||
|
dateTags' <- cont
|
||||||
|
pure $ dateTags ++ dateTags'
|
||||||
|
|
||||||
-- A transaction/posting comment must start with a semicolon. This parser
|
atColon :: Text -> TextParser m ([Tag], [DateTag])
|
||||||
-- discards the leading whitespace of the comment and returns the source
|
atColon name = char ':' *> do
|
||||||
-- position of the comment's first non-whitespace character.
|
|
||||||
commentp :: TextParser m (SourcePos, Text)
|
|
||||||
commentp = commentStartingWithp (==';')
|
|
||||||
|
|
||||||
-- A line (file-level) comment can start with a semicolon, hash, or star
|
|
||||||
-- (allowing org nodes). This parser discards the leading whitespace of
|
|
||||||
-- the comment and returns the source position of the comment's first
|
|
||||||
-- non-whitespace character.
|
|
||||||
linecommentp :: TextParser m (SourcePos, Text)
|
|
||||||
linecommentp =
|
|
||||||
commentStartingWithp $ \c -> c == ';' || c == '#' || c == '*'
|
|
||||||
|
|
||||||
commentStartingWithp :: (Char -> Bool) -> TextParser m (SourcePos, Text)
|
|
||||||
commentStartingWithp f = do
|
|
||||||
-- ptrace "commentStartingWith"
|
|
||||||
satisfy f
|
|
||||||
skipMany spacenonewline
|
skipMany spacenonewline
|
||||||
startPos <- getPosition
|
(tags, dateTags) <- case name of
|
||||||
content <- takeWhileP Nothing (\c -> c /= '\n')
|
"" -> pure ([], [])
|
||||||
optional newline
|
"date" -> dateValue name
|
||||||
return (startPos, content)
|
"date2" -> dateValue name
|
||||||
|
_ -> tagValue name
|
||||||
|
_ <- optional $ char ','
|
||||||
|
bimap (tags++) (dateTags++) <$> commenttagsanddatesp mYear
|
||||||
|
|
||||||
--- ** tags
|
dateValue :: Text -> TextParser m ([Tag], [DateTag])
|
||||||
|
dateValue name = do
|
||||||
|
(txt, (date, dateTags)) <- match' $ do
|
||||||
|
date <- datep' mYear
|
||||||
|
dateTags <- readUpTo ','
|
||||||
|
pure (date, dateTags)
|
||||||
|
let val = T.strip txt
|
||||||
|
pure $ ( [(name, val)]
|
||||||
|
, (name, date) : dateTags )
|
||||||
|
|
||||||
-- | Extract any tags (name:value ended by comma or newline) embedded in a string.
|
tagValue :: Text -> TextParser m ([Tag], [DateTag])
|
||||||
--
|
tagValue name = do
|
||||||
-- >>> commentTags "a b:, c:c d:d, e"
|
(txt, dateTags) <- match' $ readUpTo ','
|
||||||
-- [("b",""),("c","c d:d")]
|
let val = T.strip txt
|
||||||
--
|
pure $ ( [(name, val)]
|
||||||
-- >>> commentTags "a [1/1/1] [1/1] [1], [=1/1/1] [=1/1] [=1] [1/1=1/1/1] [1=1/1/1] b:c"
|
, dateTags )
|
||||||
-- [("b","c")]
|
|
||||||
--
|
|
||||||
-- --[("date","1/1/1"),("date","1/1"),("date2","1/1/1"),("date2","1/1"),("date","1/1"),("date2","1/1/1"),("date","1"),("date2","1/1/1")]
|
|
||||||
--
|
|
||||||
-- >>> commentTags "\na b:, \nd:e, f"
|
|
||||||
-- [("b",""),("d","e")]
|
|
||||||
--
|
|
||||||
-- >>> commentTags ":value"
|
|
||||||
-- []
|
|
||||||
--
|
|
||||||
commentTags :: Text -> [Tag]
|
|
||||||
commentTags s = either (const []) id $ runTextParser tagsp s
|
|
||||||
|
|
||||||
-- | Parse all tags found in a string.
|
{-# INLINABLE commenttagsanddatesp #-}
|
||||||
tagsp :: SimpleTextParser [Tag]
|
|
||||||
tagsp = map snd <$> tagswithvaluepositions
|
|
||||||
|
|
||||||
tagswithvaluepositions :: SimpleTextParser [(SourcePos, Tag)]
|
|
||||||
tagswithvaluepositions = do
|
|
||||||
-- pdbg 0 $ "tagsp"
|
|
||||||
|
|
||||||
-- If we parse in a single pass, we cannot know whether some text
|
|
||||||
-- belongs to a tag label until we have reached a colon (in which case
|
|
||||||
-- it does) or whitespace (in which case it does not). Therefore, we
|
|
||||||
-- hold on to the text until we reach such a break point, and then
|
|
||||||
-- decide what to do.
|
|
||||||
|
|
||||||
potentialTagName <- tillNextBreak
|
|
||||||
atSpaceChar <|> atColon potentialTagName <|> atEof
|
|
||||||
|
|
||||||
where
|
|
||||||
|
|
||||||
isBreak :: Char -> Bool
|
|
||||||
isBreak c = isSpace c || c == ':'
|
|
||||||
|
|
||||||
tillNextBreak :: SimpleTextParser Text
|
|
||||||
tillNextBreak = takeWhileP Nothing (not . isBreak)
|
|
||||||
|
|
||||||
tagValue :: SimpleTextParser Text
|
|
||||||
tagValue = T.strip <$> takeWhileP Nothing (not . commaOrNewline)
|
|
||||||
where commaOrNewline c = c == ',' || c == '\n'
|
|
||||||
|
|
||||||
atSpaceChar :: SimpleTextParser [(SourcePos, Tag)]
|
|
||||||
atSpaceChar = skipSome spaceChar *> tagswithvaluepositions
|
|
||||||
|
|
||||||
atColon :: Text -> SimpleTextParser [(SourcePos, Tag)]
|
|
||||||
atColon tagName = do
|
|
||||||
char ':'
|
|
||||||
if T.null tagName
|
|
||||||
then tagswithvaluepositions
|
|
||||||
else do
|
|
||||||
pos <- getPosition
|
|
||||||
tagVal <- tagValue
|
|
||||||
let tag = (pos, (tagName, tagVal))
|
|
||||||
tags <- tagswithvaluepositions
|
|
||||||
pure $ tag : tags
|
|
||||||
|
|
||||||
atEof :: SimpleTextParser [(SourcePos, Tag)]
|
|
||||||
atEof = eof *> pure []
|
|
||||||
|
|
||||||
--- ** posting dates
|
|
||||||
|
|
||||||
-- | Parse all bracketed posting dates found in a string. The dates are
|
|
||||||
-- parsed fully to give useful errors. Missing years can be inferred only
|
|
||||||
-- if a default date is provided.
|
|
||||||
--
|
|
||||||
bracketedpostingdatesp :: Maybe Day -> SimpleTextParser [(TagName,Day)]
|
|
||||||
bracketedpostingdatesp mdefdate = do
|
|
||||||
-- pdbg 0 $ "bracketedpostingdatesp"
|
|
||||||
skipMany $ notChar '['
|
|
||||||
concat <$> sepEndBy (bracketeddatetagsp mdefdate <|> char '[' *> pure [])
|
|
||||||
(skipMany $ notChar '[')
|
|
||||||
|
|
||||||
--- ** bracketed dates
|
--- ** bracketed dates
|
||||||
|
|
||||||
@ -1105,8 +1131,9 @@ bracketedpostingdatesp mdefdate = do
|
|||||||
-- >>> either (Left . parseErrorPretty) Right $ rtp (bracketeddatetagsp Nothing) "[0123456789/-.=/-.=]"
|
-- >>> either (Left . parseErrorPretty) Right $ rtp (bracketeddatetagsp Nothing) "[0123456789/-.=/-.=]"
|
||||||
-- Left ...1:13:...expecting month or day...
|
-- Left ...1:13:...expecting month or day...
|
||||||
--
|
--
|
||||||
bracketeddatetagsp :: Maybe Day -> SimpleTextParser [(TagName, Day)]
|
bracketeddatetagsp
|
||||||
bracketeddatetagsp mdefdate = do
|
:: Maybe Year -> TextParser m [(TagName, Day)]
|
||||||
|
bracketeddatetagsp mYear1 = do
|
||||||
-- pdbg 0 "bracketeddatetagsp"
|
-- pdbg 0 "bracketeddatetagsp"
|
||||||
try $ do
|
try $ do
|
||||||
s <- lookAhead
|
s <- lookAhead
|
||||||
@ -1117,14 +1144,24 @@ bracketeddatetagsp mdefdate = do
|
|||||||
-- Looks sufficiently like a bracketed date to commit to parsing a date
|
-- Looks sufficiently like a bracketed date to commit to parsing a date
|
||||||
|
|
||||||
between (char '[') (char ']') $ do
|
between (char '[') (char ']') $ do
|
||||||
let myear1 = fmap readYear mdefdate
|
md1 <- optional $ datep' mYear1
|
||||||
md1 <- optional $ datep' myear1
|
|
||||||
|
|
||||||
let myear2 = fmap readYear md1 <|> myear1
|
let mYear2 = fmap readYear md1 <|> mYear1
|
||||||
md2 <- optional $ char '=' *> datep' myear2
|
md2 <- optional $ char '=' *> datep' mYear2
|
||||||
|
|
||||||
pure $ catMaybes [("date",) <$> md1, ("date2",) <$> md2]
|
pure $ catMaybes [("date",) <$> md1, ("date2",) <$> md2]
|
||||||
|
|
||||||
where
|
where
|
||||||
readYear = first3 . toGregorian
|
readYear = first3 . toGregorian
|
||||||
isBracketedDateChar c = isDigit c || isDateSepChar c || c == '='
|
isBracketedDateChar c = isDigit c || isDateSepChar c || c == '='
|
||||||
|
|
||||||
|
{-# INLINABLE bracketeddatetagsp #-}
|
||||||
|
|
||||||
|
|
||||||
|
--- ** helper parsers
|
||||||
|
|
||||||
|
-- A version of `match` that is strict in the returned text
|
||||||
|
match' :: TextParser m a -> TextParser m (Text, a)
|
||||||
|
match' p = do
|
||||||
|
(!txt, p) <- match p
|
||||||
|
pure (txt, p)
|
||||||
|
|||||||
@ -248,12 +248,7 @@ accountdirectivep = do
|
|||||||
macode' :: Maybe String <- (optional $ lift $ skipSome spacenonewline >> some digitChar)
|
macode' :: Maybe String <- (optional $ lift $ skipSome spacenonewline >> some digitChar)
|
||||||
let macode :: Maybe AccountCode = read <$> macode'
|
let macode :: Maybe AccountCode = read <$> macode'
|
||||||
newline
|
newline
|
||||||
_tags <- many $ do
|
skipMany indentedlinep
|
||||||
startpos <- getPosition
|
|
||||||
l <- indentedlinep
|
|
||||||
case runTextParser (setPosition startpos >> tagsp) $ T.pack l of
|
|
||||||
Right ts -> return ts
|
|
||||||
Left _e -> return [] -- TODO throwError $ parseErrorPretty e
|
|
||||||
|
|
||||||
modify' (\j -> j{jaccounts = (acct, macode) : jaccounts j})
|
modify' (\j -> j{jaccounts = (acct, macode) : jaccounts j})
|
||||||
|
|
||||||
@ -477,9 +472,9 @@ transactionp = do
|
|||||||
status <- lift statusp <?> "cleared status"
|
status <- lift statusp <?> "cleared status"
|
||||||
code <- lift codep <?> "transaction code"
|
code <- lift codep <?> "transaction code"
|
||||||
description <- T.strip <$> descriptionp
|
description <- T.strip <$> descriptionp
|
||||||
comment <- lift followingcommentp
|
(comment, tags) <- lift transactioncommentp
|
||||||
let tags = commentTags comment
|
let year = first3 $ toGregorian date
|
||||||
postings <- postingsp (Just date)
|
postings <- postingsp (Just year)
|
||||||
pos' <- getPosition
|
pos' <- getPosition
|
||||||
let sourcepos = journalSourcePos pos pos'
|
let sourcepos = journalSourcePos pos pos'
|
||||||
return $ txnTieKnot $ Transaction 0 sourcepos date edate status code description comment tags postings ""
|
return $ txnTieKnot $ Transaction 0 sourcepos date edate status code description comment tags postings ""
|
||||||
@ -581,8 +576,8 @@ test_transactionp = do
|
|||||||
|
|
||||||
-- Parse the following whitespace-beginning lines as postings, posting
|
-- Parse the following whitespace-beginning lines as postings, posting
|
||||||
-- tags, and/or comments (inferring year, if needed, from the given date).
|
-- tags, and/or comments (inferring year, if needed, from the given date).
|
||||||
postingsp :: MonadIO m => Maybe Day -> ErroringJournalParser m [Posting]
|
postingsp :: MonadIO m => Maybe Year -> ErroringJournalParser m [Posting]
|
||||||
postingsp mdate = many (postingp mdate) <?> "postings"
|
postingsp mTransactionYear = many (postingp mTransactionYear) <?> "postings"
|
||||||
|
|
||||||
-- linebeginningwithspaces :: Monad m => JournalParser m String
|
-- linebeginningwithspaces :: Monad m => JournalParser m String
|
||||||
-- linebeginningwithspaces = do
|
-- linebeginningwithspaces = do
|
||||||
@ -591,8 +586,8 @@ postingsp mdate = many (postingp mdate) <?> "postings"
|
|||||||
-- cs <- lift restofline
|
-- cs <- lift restofline
|
||||||
-- return $ sp ++ (c:cs) ++ "\n"
|
-- return $ sp ++ (c:cs) ++ "\n"
|
||||||
|
|
||||||
postingp :: MonadIO m => Maybe Day -> ErroringJournalParser m Posting
|
postingp :: MonadIO m => Maybe Year -> ErroringJournalParser m Posting
|
||||||
postingp mtdate = do
|
postingp mTransactionYear = do
|
||||||
-- pdbg 0 "postingp"
|
-- pdbg 0 "postingp"
|
||||||
(status, account) <- try $ do
|
(status, account) <- try $ do
|
||||||
lift (skipSome spacenonewline)
|
lift (skipSome spacenonewline)
|
||||||
@ -605,7 +600,7 @@ postingp mtdate = do
|
|||||||
massertion <- partialbalanceassertionp
|
massertion <- partialbalanceassertionp
|
||||||
_ <- fixedlotpricep
|
_ <- fixedlotpricep
|
||||||
lift (skipMany spacenonewline)
|
lift (skipMany spacenonewline)
|
||||||
(comment,tags,mdate,mdate2) <- followingcommentandtagsp mtdate
|
(comment,tags,mdate,mdate2) <- lift $ postingcommentp mTransactionYear
|
||||||
return posting
|
return posting
|
||||||
{ pdate=mdate
|
{ pdate=mdate
|
||||||
, pdate2=mdate2
|
, pdate2=mdate2
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user