hledger/hledger-lib/Hledger/Read/TimeclockReader.hs
Simon Michael c89c33b36e lib: textification: parse stream
10% more allocation, but 35% lower maximum residency, and slightly quicker.

hledger -f data/100x100x10.journal stats
<<ghc: 39327768 bytes, 77 GCs, 196834/269496 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.010 elapsed), 0.020 MUT (0.092 elapsed), 0.014 GC (0.119 elapsed) :ghc>>
<<ghc: 42842136 bytes, 84 GCs, 194010/270912 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.009 elapsed), 0.016 MUT (0.029 elapsed), 0.012 GC (0.120 elapsed) :ghc>>

hledger -f data/1000x1000x10.journal stats
<<ghc: 314291440 bytes, 612 GCs, 2070776/6628048 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.128 MUT (0.144 elapsed), 0.059 GC (0.070 elapsed) :ghc>>
<<ghc: 349558872 bytes, 681 GCs, 1397597/4106384 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.004 elapsed), 0.124 MUT (0.133 elapsed), 0.047 GC (0.053 elapsed) :ghc>>

hledger -f data/10000x1000x10.journal stats
<<ghc: 3070026824 bytes, 5973 GCs, 12698030/62951784 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.002 elapsed), 1.268 MUT (1.354 elapsed), 0.514 GC (0.587 elapsed) :ghc>>
<<ghc: 3424013128 bytes, 6658 GCs, 11405501/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.001 elapsed), 1.343 MUT (1.406 elapsed), 0.511 GC (0.573 elapsed) :ghc>>

hledger -f data/100000x1000x10.journal stats
<<ghc: 30753387392 bytes, 59811 GCs, 117615462/666703600 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.000 elapsed), 12.068 MUT (12.238 elapsed), 6.015 GC (7.190 elapsed) :ghc>>
<<ghc: 34306530696 bytes, 66727 GCs, 76806196/414629312 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.010 elapsed), 14.357 MUT (16.370 elapsed), 5.298 GC (6.534 elapsed) :ghc>>
2016-05-24 19:00:57 -07:00

123 lines
4.2 KiB
Haskell

{-|
A reader for the timeclock file format generated by timeclock.el
(<http://www.emacswiki.org/emacs/TimeClock>). Example:
@
i 2007\/03\/10 12:26:00 hledger
o 2007\/03\/10 17:26:02
@
From timeclock.el 2.6:
@
A timeclock contains data in the form of a single entry per line.
Each entry has the form:
CODE YYYY/MM/DD HH:MM:SS [COMMENT]
CODE is one of: b, h, i, o or O. COMMENT is optional when the code is
i, o or O. The meanings of the codes are:
b Set the current time balance, or \"time debt\". Useful when
archiving old log data, when a debt must be carried forward.
The COMMENT here is the number of seconds of debt.
h Set the required working time for the given day. This must
be the first entry for that day. The COMMENT in this case is
the number of hours in this workday. Floating point amounts
are allowed.
i Clock in. The COMMENT in this case should be the name of the
project worked on.
o Clock out. COMMENT is unnecessary, but can be used to provide
a description of how the period went, for example.
O Final clock out. Whatever project was being worked on, it is
now finished. Useful for creating summary reports.
@
-}
{-# LANGUAGE OverloadedStrings #-}
module Hledger.Read.TimeclockReader (
-- * Reader
reader,
-- * Misc other exports
timeclockfilep,
-- * Tests
tests_Hledger_Read_TimeclockReader
)
where
import Prelude ()
import Prelude.Compat
import Control.Monad
import Control.Monad.IO.Class (liftIO)
import Control.Monad.Except (ExceptT)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import qualified Data.Text as T
import Test.HUnit
import Text.Parsec hiding (parse)
import System.FilePath
import Hledger.Data
-- XXX too much reuse ?
import Hledger.Read.Common
import Hledger.Utils
reader :: Reader
reader = Reader format detect parse
format :: String
format = "timeclock"
-- | Does the given file path and data look like it might be timeclock.el's timeclock format ?
detect :: FilePath -> Text -> Bool
detect f t
| f /= "-" = takeExtension f == '.':format -- from a known file name: yes if the extension is this format's name
| otherwise = regexMatches "(^|\n)[io] " $ T.unpack t -- from stdin: yes if any line starts with "i " or "o "
-- | Parse and post-process a "Journal" from timeclock.el's timeclock
-- format, saving the provided file path and the current time, or give an
-- error.
parse :: Maybe FilePath -> Bool -> FilePath -> Text -> ExceptT String IO Journal
parse _ = parseAndFinaliseJournal timeclockfilep
timeclockfilep :: ErroringJournalParser ParsedJournal
timeclockfilep = do many timeclockitemp
eof
j@Journal{jtxns=ts, jparsetimeclockentries=es} <- getState
-- Convert timeclock entries in this journal to transactions, closing any unfinished sessions.
-- Doing this here rather than in journalFinalise means timeclock sessions can't span file boundaries,
-- but it simplifies code above.
now <- liftIO getCurrentLocalTime
let j' = j{jtxns = ts ++ timeclockEntriesToTransactions now (reverse es), jparsetimeclockentries = []}
return j'
where
-- As all ledger line types can be distinguished by the first
-- character, excepting transactions versus empty (blank or
-- comment-only) lines, can use choice w/o try
timeclockitemp = choice [
void emptyorcommentlinep
, timeclockentryp >>= \e -> modifyState (\j -> j{jparsetimeclockentries = e : jparsetimeclockentries j})
] <?> "timeclock entry, or default year or historical price directive"
-- | Parse a timeclock entry.
timeclockentryp :: ErroringJournalParser TimeclockEntry
timeclockentryp = do
sourcepos <- genericSourcePos <$> getPosition
code <- oneOf "bhioO"
many1 spacenonewline
datetime <- datetimep
account <- fromMaybe "" <$> optionMaybe (many1 spacenonewline >> modifiedaccountnamep)
description <- fromMaybe "" <$> optionMaybe (many1 spacenonewline >> restofline)
return $ TimeclockEntry sourcepos (read [code]) datetime account description
tests_Hledger_Read_TimeclockReader = TestList [
]