hledger/hledger-lib/Hledger/Read/TimedotReader.hs
Simon Michael c89c33b36e lib: textification: parse stream
10% more allocation, but 35% lower maximum residency, and slightly quicker.

hledger -f data/100x100x10.journal stats
<<ghc: 39327768 bytes, 77 GCs, 196834/269496 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.010 elapsed), 0.020 MUT (0.092 elapsed), 0.014 GC (0.119 elapsed) :ghc>>
<<ghc: 42842136 bytes, 84 GCs, 194010/270912 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.009 elapsed), 0.016 MUT (0.029 elapsed), 0.012 GC (0.120 elapsed) :ghc>>

hledger -f data/1000x1000x10.journal stats
<<ghc: 314291440 bytes, 612 GCs, 2070776/6628048 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.128 MUT (0.144 elapsed), 0.059 GC (0.070 elapsed) :ghc>>
<<ghc: 349558872 bytes, 681 GCs, 1397597/4106384 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.004 elapsed), 0.124 MUT (0.133 elapsed), 0.047 GC (0.053 elapsed) :ghc>>

hledger -f data/10000x1000x10.journal stats
<<ghc: 3070026824 bytes, 5973 GCs, 12698030/62951784 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.002 elapsed), 1.268 MUT (1.354 elapsed), 0.514 GC (0.587 elapsed) :ghc>>
<<ghc: 3424013128 bytes, 6658 GCs, 11405501/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.001 elapsed), 1.343 MUT (1.406 elapsed), 0.511 GC (0.573 elapsed) :ghc>>

hledger -f data/100000x1000x10.journal stats
<<ghc: 30753387392 bytes, 59811 GCs, 117615462/666703600 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.000 elapsed), 12.068 MUT (12.238 elapsed), 6.015 GC (7.190 elapsed) :ghc>>
<<ghc: 34306530696 bytes, 66727 GCs, 76806196/414629312 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.010 elapsed), 14.357 MUT (16.370 elapsed), 5.298 GC (6.534 elapsed) :ghc>>
2016-05-24 19:00:57 -07:00

156 lines
4.2 KiB
Haskell

{-|
A reader for the "timedot" file format.
Example:
@
#DATE
#ACCT DOTS # Each dot represents 15m, spaces are ignored
# on 2/1, 1h was spent on FOSS haskell work, 0.25h on research, etc.
2/1
fos.haskell .... ..
biz.research .
inc.client1 .... .... .... .... .... ....
2/2
biz.research .
inc.client1 .... .... ..
@
-}
module Hledger.Read.TimedotReader (
-- * Reader
reader,
-- * Misc other exports
timedotfilep,
-- * Tests
tests_Hledger_Read_TimedotReader
)
where
import Prelude ()
import Prelude.Compat
import Control.Monad
import Control.Monad.Except (ExceptT)
import Data.Char (isSpace)
import Data.List (foldl')
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as T
import Test.HUnit
import Text.Parsec hiding (parse)
import System.FilePath
import Hledger.Data
import Hledger.Read.Common
import Hledger.Utils hiding (ptrace)
-- easier to toggle this here sometimes
-- import qualified Hledger.Utils (ptrace)
-- ptrace = Hledger.Utils.ptrace
ptrace = return
reader :: Reader
reader = Reader format detect parse
format :: String
format = "timedot"
-- | Does the given file path and data look like it might contain this format ?
detect :: FilePath -> Text -> Bool
detect f t
| f /= "-" = takeExtension f == '.':format -- from a file: yes if the extension matches the format name
| otherwise = regexMatches "(^|\n)[0-9]" $ T.unpack t -- from stdin: yes if we can see a possible timedot day entry (digits in column 0)
-- | Parse and post-process a "Journal" from the timedot format, or give an error.
parse :: Maybe FilePath -> Bool -> FilePath -> Text -> ExceptT String IO Journal
parse _ = parseAndFinaliseJournal timedotfilep
timedotfilep :: ErroringJournalParser ParsedJournal
timedotfilep = do many timedotfileitemp
eof
getState
where
timedotfileitemp = do
ptrace "timedotfileitemp"
choice [
void emptyorcommentlinep
,timedotdayp >>= \ts -> modifyState (addTransactions ts)
] <?> "timedot day entry, or default year or comment line or blank line"
addTransactions :: [Transaction] -> Journal -> Journal
addTransactions ts j = foldl' (flip ($)) j (map addTransaction ts)
-- | Parse timedot day entries to zero or more time transactions for that day.
-- @
-- 2/1
-- fos.haskell .... ..
-- biz.research .
-- inc.client1 .... .... .... .... .... ....
-- @
timedotdayp :: ErroringJournalParser [Transaction]
timedotdayp = do
ptrace " timedotdayp"
d <- datep <* eolof
es <- catMaybes <$> many (const Nothing <$> try emptyorcommentlinep <|>
Just <$> (notFollowedBy datep >> timedotentryp))
return $ map (\t -> t{tdate=d}) es -- <$> many timedotentryp
-- | Parse a single timedot entry to one (dateless) transaction.
-- @
-- fos.haskell .... ..
-- @
timedotentryp :: ErroringJournalParser Transaction
timedotentryp = do
ptrace " timedotentryp"
pos <- genericSourcePos <$> getPosition
many spacenonewline
a <- modifiedaccountnamep
many spacenonewline
hours <-
try (followingcommentp >> return 0)
<|> (timedotdurationp <*
(try followingcommentp <|> (newline >> return "")))
let t = nulltransaction{
tsourcepos = pos,
tstatus = Cleared,
tpostings = [
nullposting{paccount=a
,pamount=Mixed [setAmountPrecision 2 $ num hours] -- don't assume hours; do set precision to 2
,ptype=VirtualPosting
,ptransaction=Just t
}
]
}
return t
timedotdurationp :: ErroringJournalParser Quantity
timedotdurationp = try timedotnumberp <|> timedotdotsp
-- | Parse a duration written as a decimal number of hours (optionally followed by the letter h).
-- @
-- 1.5h
-- @
timedotnumberp :: ErroringJournalParser Quantity
timedotnumberp = do
(q, _, _, _) <- numberp
many spacenonewline
optional $ char 'h'
many spacenonewline
return q
-- | Parse a quantity written as a line of dots, each representing 0.25.
-- @
-- .... ..
-- @
timedotdotsp :: ErroringJournalParser Quantity
timedotdotsp = do
dots <- filter (not.isSpace) <$> many (oneOf ". ")
return $ (/4) $ fromIntegral $ length dots
tests_Hledger_Read_TimedotReader = TestList [
]