timedot: rewrite the parser, making it more usable

Now, org headlines before the first day entry are ignored,
regardless of content.

Note, blank lines inside a day entry are not allowed, currently.

It's now easier to be both valid journal and valid timedot at the same
time, so guessing the format of stdin is unreliable, and some tests
are failing. See following commit.
This commit is contained in:
Simon Michael 2020-02-29 09:11:56 -08:00
parent 26c19c65b0
commit 32eb839eac

View File

@ -51,14 +51,14 @@ import Control.Monad.Except (ExceptT)
import Control.Monad.State.Strict import Control.Monad.State.Strict
import Data.Char (isSpace) import Data.Char (isSpace)
import Data.List (foldl') import Data.List (foldl')
import Data.Maybe
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
import Data.Time (Day)
import Text.Megaparsec hiding (parse) import Text.Megaparsec hiding (parse)
import Text.Megaparsec.Char import Text.Megaparsec.Char
import Hledger.Data import Hledger.Data
import Hledger.Read.Common import Hledger.Read.Common hiding (emptyorcommentlinep)
import Hledger.Utils import Hledger.Utils
-- ** reader -- ** reader
@ -73,37 +73,56 @@ reader = Reader
-- | Parse and post-process a "Journal" from the timedot format, or give an error. -- | Parse and post-process a "Journal" from the timedot format, or give an error.
parse :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal parse :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal
parse = parseAndFinaliseJournal' timedotfilep parse = parseAndFinaliseJournal' timedotp
-- ** utilities -- ** utilities
traceparse :: Monad m => a -> m a traceparse :: String -> TextParser m ()
traceparse = return traceparse = const $ return ()
-- traceparse :: String -> JournalParser m () -- traceparse = traceParse -- for debugging
-- traceparse = lift.traceParse
-- ** parsers -- ** parsers
{-
Rough grammar for timedot format:
timedotfilep :: JournalParser m ParsedJournal timedot: preamble day*
timedotfilep = do many timedotfileitemp preamble: (emptyline | commentline | orgheading)*
eof orgheading: orgheadingprefix restofline
get day: dateline entry* (emptyline | commentline)*
dateline: orgheadingprefix? date description?
orgheadingprefix: star+ space+
description: restofline ; till semicolon?
entry: orgheadingprefix? space* singlespaced (doublespace quantity?)?
doublespace: space space+
quantity: (dot (dot | space)* | number | number unit)
timedotfileitemp :: JournalParser m () Date lines and item lines can begin with an org heading prefix, which is ignored.
timedotfileitemp = do Org headings before the first date line are ignored, regardless of content.
traceparse "timedotfileitemp" -}
choice [
try $ void $ lift emptyorcommentlinep'
,try timedotdayp >>= \ts -> modify' (addTransactions ts)
,lift $ skipSome anySingle >> eolof -- an initial line not beginning with a date, ignore
] <?> "timedot day entry, or default year or comment line or blank line"
addTransactions :: [Transaction] -> Journal -> Journal timedotfilep = timedotp -- XXX rename export above
addTransactions ts j = foldl' (flip ($)) j (map addTransaction ts)
emptyorcommentlinep' = optional orgheadingprefixp >> emptyorcommentlinep timedotp :: JournalParser m ParsedJournal
timedotp = preamblep >> many dayp >> eof >> get
orgheadingprefixp = skipSome (char '*') >> skipSome spacenonewline preamblep :: JournalParser m ()
preamblep = do
lift $ traceparse "preamblep"
void $ many $ notFollowedBy datelinep >> (lift $ emptyorcommentlinep "#;*")
-- | XXX new comment line parser, move to Hledger.Read.Common.emptyorcommentlinep
-- Parse empty lines, all-blank lines, and lines beginning with any of the provided
-- comment-beginning characters.
emptyorcommentlinep :: [Char] -> TextParser m ()
emptyorcommentlinep cs =
label ("empty line or comment line beginning with "++cs) $ do
traceparse "emptyorcommentlinep" -- XXX possible to combine label and traceparse ?
skipMany spacenonewline
void newline <|> void commentp
where
commentp = do
choice (map (some.char) cs)
takeWhileP Nothing (/='\n') <* newline
-- | Parse timedot day entries to zero or more time transactions for that day. -- | Parse timedot day entries to zero or more time transactions for that day.
-- @ -- @
@ -112,30 +131,50 @@ orgheadingprefixp = skipSome (char '*') >> skipSome spacenonewline
-- biz.research . -- biz.research .
-- inc.client1 .... .... .... .... .... .... -- inc.client1 .... .... .... .... .... ....
-- @ -- @
timedotdayp :: JournalParser m [Transaction] dayp :: JournalParser m ()
timedotdayp = do dayp = label "timedot day entry" $ do
traceparse " timedotdayp" lift $ traceparse "dayp"
(d,desc) <- datelinep
ts <- many entryp
let ts' = map (\t -> t{tdate=d, tdescription=desc}) ts
modify' $ addTransactions ts'
void $ many $
(lift $ emptyorcommentlinep "#;") <|> orgnondatelinep
where
addTransactions :: [Transaction] -> Journal -> Journal
addTransactions ts j = foldl' (flip ($)) j (map addTransaction ts)
datelinep :: JournalParser m (Day,Text)
datelinep = do
lift $ traceparse "datelinep"
lift $ optional orgheadingprefixp lift $ optional orgheadingprefixp
d <- datep d <- datep
daydesc <- strip <$> lift restofline desc <- strip <$> lift restofline
es <- catMaybes <$> many (const Nothing <$> try (lift emptyorcommentlinep') <|> return (d, T.pack desc)
Just <$> (notFollowedBy datep >> timedotentryp))
return $ map (\t -> t{tdate=d, tdescription=T.pack daydesc}) es -- <$> many timedotentryp orgnondatelinep :: JournalParser m ()
orgnondatelinep = do
lift $ traceparse "orgnondatelinep"
notFollowedBy datelinep
lift orgheadingprefixp
void $ lift restofline
orgheadingprefixp = skipSome (char '*') >> skipSome spacenonewline
-- | Parse a single timedot entry to one (dateless) transaction. -- | Parse a single timedot entry to one (dateless) transaction.
-- @ -- @
-- fos.haskell .... .. -- fos.haskell .... ..
-- @ -- @
timedotentryp :: JournalParser m Transaction entryp :: JournalParser m Transaction
timedotentryp = do entryp = do
traceparse " timedotentryp" lift $ traceparse " entryp"
pos <- genericSourcePos <$> getSourcePos pos <- genericSourcePos <$> getSourcePos
lift $ optional $ choice [orgheadingprefixp, skipSome spacenonewline] lift $ optional $ choice [orgheadingprefixp, skipSome spacenonewline]
a <- modifiedaccountnamep a <- modifiedaccountnamep
lift (skipMany spacenonewline) lift (skipMany spacenonewline)
hours <- hours <-
try (lift followingcommentp >> return 0) try (lift followingcommentp >> return 0)
<|> (timedotdurationp <* <|> (durationp <*
(try (lift followingcommentp) <|> (newline >> return ""))) (try (lift followingcommentp) <|> (newline >> return "")))
let t = nulltransaction{ let t = nulltransaction{
tsourcepos = pos, tsourcepos = pos,
@ -150,8 +189,8 @@ timedotentryp = do
} }
return t return t
timedotdurationp :: JournalParser m Quantity durationp :: JournalParser m Quantity
timedotdurationp = try timedotnumericp <|> timedotdotsp durationp = try numericquantityp <|> dotquantityp
-- | Parse a duration of seconds, minutes, hours, days, weeks, months or years, -- | Parse a duration of seconds, minutes, hours, days, weeks, months or years,
-- written as a decimal number followed by s, m, h, d, w, mo or y, assuming h -- written as a decimal number followed by s, m, h, d, w, mo or y, assuming h
@ -162,8 +201,8 @@ timedotdurationp = try timedotnumericp <|> timedotdotsp
-- 1.5h -- 1.5h
-- 90m -- 90m
-- @ -- @
timedotnumericp :: JournalParser m Quantity numericquantityp :: JournalParser m Quantity
timedotnumericp = do numericquantityp = do
(q, _, _, _) <- lift $ numberp Nothing (q, _, _, _) <- lift $ numberp Nothing
msymbol <- optional $ choice $ map (string . fst) timeUnits msymbol <- optional $ choice $ map (string . fst) timeUnits
lift (skipMany spacenonewline) lift (skipMany spacenonewline)
@ -191,7 +230,7 @@ timeUnits =
-- @ -- @
-- .... .. -- .... ..
-- @ -- @
timedotdotsp :: JournalParser m Quantity dotquantityp :: JournalParser m Quantity
timedotdotsp = do dotquantityp = do
dots <- filter (not.isSpace) <$> many (oneOf (". " :: [Char])) dots <- filter (not.isSpace) <$> many (oneOf (". " :: [Char]))
return $ (/4) $ fromIntegral $ length dots return $ (/4) $ fromIntegral $ length dots