lib: textification: descriptions & codes

Slightly higher (with small files) and lower (with large files) maximum
residency, and slightly quicker for all.

hledger -f data/100x100x10.journal stats
<<ghc: 42858472 bytes, 84 GCs, 193712/269608 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.015 elapsed), 0.016 MUT (0.042 elapsed), 0.011 GC (0.119 elapsed) :ghc>>
<<ghc: 42891776 bytes, 84 GCs, 190816/260920 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.004 elapsed), 0.017 MUT (0.025 elapsed), 0.010 GC (0.015 elapsed) :ghc>>

hledger -f data/1000x1000x10.journal stats
<<ghc: 349575240 bytes, 681 GCs, 1396425/4091680 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.000 elapsed), 0.137 MUT (0.146 elapsed), 0.050 GC (0.057 elapsed) :ghc>>
<<ghc: 349927568 bytes, 681 GCs, 1397825/4097248 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.000 elapsed), 0.126 MUT (0.133 elapsed), 0.050 GC (0.057 elapsed) :ghc>>

hledger -f data/10000x1000x10.journal stats
<<ghc: 3424029496 bytes, 6658 GCs, 11403141/41077288 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.000 elapsed), 1.278 MUT (1.310 elapsed), 0.493 GC (0.546 elapsed) :ghc>>
<<ghc: 3427418064 bytes, 6665 GCs, 11127869/37790168 avg/max bytes residency (11 samples), 109M in use, 0.000 INIT (0.001 elapsed), 1.212 MUT (1.229 elapsed), 0.466 GC (0.519 elapsed) :ghc>>

hledger -f data/100000x1000x10.journal stats
<<ghc: 34306546248 bytes, 66727 GCs, 77030638/414617944 avg/max bytes residency (14 samples), 1012M in use, 0.000 INIT (0.000 elapsed), 12.965 MUT (13.164 elapsed), 4.771 GC (5.447 elapsed) :ghc>>
<<ghc: 34340246056 bytes, 66779 GCs, 76983178/416011480 avg/max bytes residency (14 samples), 1011M in use, 0.000 INIT (0.008 elapsed), 12.666 MUT (12.836 elapsed), 4.595 GC (5.175 elapsed) :ghc>>
This commit is contained in:
Simon Michael 2016-05-24 18:51:52 -07:00
parent a1b68009da
commit 90c9735b7a
16 changed files with 49 additions and 47 deletions

View File

@ -215,7 +215,7 @@ journalPrevTransaction :: Journal -> Transaction -> Maybe Transaction
journalPrevTransaction j t = journalTransactionAt j (tindex t - 1)
-- | Unique transaction descriptions used in this journal.
journalDescriptions :: Journal -> [String]
journalDescriptions :: Journal -> [Text]
journalDescriptions = nub . sort . map tdescription . jtxns
-- | All postings from this journal's transactions, in order.

View File

@ -12,7 +12,7 @@ module Hledger.Data.Timeclock
where
import Data.Maybe
-- import Data.Text (Text)
-- import qualified Data.Text as T
import qualified Data.Text as T
import Data.Time.Calendar
import Data.Time.Clock
import Data.Time.Format
@ -99,8 +99,8 @@ entryFromTimeclockInOut i o
itod = localTimeOfDay itime
otod = localTimeOfDay otime
idate = localDay itime
desc | null (tldescription i) = showtime itod ++ "-" ++ showtime otod
| otherwise = tldescription i
desc | T.null (tldescription i) = T.pack $ showtime itod ++ "-" ++ showtime otod
| otherwise = tldescription i
showtime = take 5 . show
hours = elapsedSeconds (toutc otime) (toutc itime) / 3600 where toutc = localTimeToUTC utc
acctname = tlaccount i
@ -125,7 +125,7 @@ tests_Hledger_Data_Timeclock = TestList [
parseTime defaultTimeLocale "%H:%M:%S"
#endif
showtime = formatTime defaultTimeLocale "%H:%M"
assertEntriesGiveStrings name es ss = assertEqual name ss (map tdescription $ timeclockEntriesToTransactions now es)
assertEntriesGiveStrings name es ss = assertEqual name ss (map (T.unpack . tdescription) $ timeclockEntriesToTransactions now es)
assertEntriesGiveStrings "started yesterday, split session at midnight"
[clockin (mktime yesterday "23:00:00") "" ""]

View File

@ -58,10 +58,10 @@ import Hledger.Data.Amount
instance Show Transaction where show = showTransactionUnelided
instance Show ModifierTransaction where
show t = "= " ++ mtvalueexpr t ++ "\n" ++ unlines (map show (mtpostings t))
show t = "= " ++ T.unpack (mtvalueexpr t) ++ "\n" ++ unlines (map show (mtpostings t))
instance Show PeriodicTransaction where
show t = "~ " ++ ptperiodicexpr t ++ "\n" ++ unlines (map show (ptpostings t))
show t = "~ " ++ T.unpack (ptperiodicexpr t) ++ "\n" ++ unlines (map show (ptpostings t))
nullsourcepos :: GenericSourcePos
nullsourcepos = GenericSourcePos "" 1 1
@ -152,8 +152,8 @@ showTransactionHelper elide onelineamounts t =
status | tstatus t == Cleared = " *"
| tstatus t == Pending = " !"
| otherwise = ""
code = if length (tcode t) > 0 then printf " (%s)" $ tcode t else ""
desc = if null d then "" else " " ++ d where d = tdescription t
code = if T.length (tcode t) > 0 then printf " (%s)" $ T.unpack $ tcode t else ""
desc = if null d then "" else " " ++ d where d = T.unpack $ tdescription t
(samelinecomment, newlinecomments) =
case renderCommentLines (tcomment t) of [] -> ("",[])
c:cs -> (c,cs)

View File

@ -176,8 +176,8 @@ data Transaction = Transaction {
tdate :: Day,
tdate2 :: Maybe Day,
tstatus :: ClearedStatus,
tcode :: String,
tdescription :: String,
tcode :: Text,
tdescription :: Text,
tcomment :: Text, -- ^ this transaction's comment lines, as a single non-indented multi-line string
ttags :: [Tag], -- ^ tag names and values, extracted from the comment
tpostings :: [Posting], -- ^ this transaction's postings
@ -187,14 +187,14 @@ data Transaction = Transaction {
instance NFData Transaction
data ModifierTransaction = ModifierTransaction {
mtvalueexpr :: String,
mtvalueexpr :: Text,
mtpostings :: [Posting]
} deriving (Eq,Typeable,Data,Generic)
instance NFData ModifierTransaction
data PeriodicTransaction = PeriodicTransaction {
ptperiodicexpr :: String,
ptperiodicexpr :: Text,
ptpostings :: [Posting]
} deriving (Eq,Typeable,Data,Generic)
@ -209,7 +209,7 @@ data TimeclockEntry = TimeclockEntry {
tlcode :: TimeclockCode,
tldatetime :: LocalTime,
tlaccount :: AccountName,
tldescription :: String
tldescription :: Text
} deriving (Eq,Ord,Typeable,Data,Generic)
instance NFData TimeclockEntry

View File

@ -635,8 +635,8 @@ matchesPosting (Any) _ = True
matchesPosting (None) _ = False
matchesPosting (Or qs) p = any (`matchesPosting` p) qs
matchesPosting (And qs) p = all (`matchesPosting` p) qs
matchesPosting (Code r) p = regexMatchesCI r $ maybe "" tcode $ ptransaction p
matchesPosting (Desc r) p = regexMatchesCI r $ maybe "" tdescription $ ptransaction p
matchesPosting (Code r) p = regexMatchesCI r $ maybe "" (T.unpack . tcode) $ ptransaction p
matchesPosting (Desc r) p = regexMatchesCI r $ maybe "" (T.unpack . tdescription) $ ptransaction p
matchesPosting (Acct r) p = regexMatchesCI r $ T.unpack $ paccount p -- XXX pack
matchesPosting (Date span) p = span `spanContainsDate` postingDate p
matchesPosting (Date2 span) p = span `spanContainsDate` postingDate2 p
@ -693,8 +693,8 @@ matchesTransaction (Any) _ = True
matchesTransaction (None) _ = False
matchesTransaction (Or qs) t = any (`matchesTransaction` t) qs
matchesTransaction (And qs) t = all (`matchesTransaction` t) qs
matchesTransaction (Code r) t = regexMatchesCI r $ tcode t
matchesTransaction (Desc r) t = regexMatchesCI r $ tdescription t
matchesTransaction (Code r) t = regexMatchesCI r $ T.unpack $ tcode t
matchesTransaction (Desc r) t = regexMatchesCI r $ T.unpack $ tdescription t
matchesTransaction q@(Acct _) t = any (q `matchesPosting`) $ tpostings t
matchesTransaction (Date span) t = spanContainsDate span $ tdate t
matchesTransaction (Date2 span) t = spanContainsDate span $ transactionDate2 t

View File

@ -649,8 +649,8 @@ transactionFromCsvRecord sourcepos rules record = t
tdate = date',
tdate2 = mdate2',
tstatus = status,
tcode = code,
tdescription = description,
tcode = T.pack code,
tdescription = T.pack description,
tcomment = T.pack comment,
tpreceding_comment_lines = T.pack precomment,
tpostings =

View File

@ -411,7 +411,7 @@ modifiertransactionp :: ErroringJournalParser ModifierTransaction
modifiertransactionp = do
char '=' <?> "modifier transaction"
many spacenonewline
valueexpr <- restofline
valueexpr <- T.pack <$> restofline
postings <- postingsp Nothing
return $ ModifierTransaction valueexpr postings
@ -419,7 +419,7 @@ periodictransactionp :: ErroringJournalParser PeriodicTransaction
periodictransactionp = do
char '~' <?> "periodic transaction"
many spacenonewline
periodexpr <- restofline
periodexpr <- T.pack <$> restofline
postings <- postingsp Nothing
return $ PeriodicTransaction periodexpr postings
@ -432,8 +432,8 @@ transactionp = do
edate <- optionMaybe (secondarydatep date) <?> "secondary date"
lookAhead (spacenonewline <|> newline) <?> "whitespace or newline"
status <- statusp <?> "cleared status"
code <- codep <?> "transaction code"
description <- strip <$> descriptionp
code <- T.pack <$> codep <?> "transaction code"
description <- T.pack . strip <$> descriptionp
comment <- try followingcommentp <|> (newline >> return "")
let tags = commentTags comment
postings <- postingsp (Just date)

View File

@ -114,7 +114,7 @@ timeclockentryp = do
many1 spacenonewline
datetime <- datetimep
account <- fromMaybe "" <$> optionMaybe (many1 spacenonewline >> modifiedaccountnamep)
description <- fromMaybe "" <$> optionMaybe (many1 spacenonewline >> restofline)
description <- T.pack . fromMaybe "" <$> optionMaybe (many1 spacenonewline >> restofline)
return $ TimeclockEntry sourcepos (read [code]) datetime account description
tests_Hledger_Read_TimeclockReader = TestList [

View File

@ -19,6 +19,8 @@ where
import Data.List
import Data.Maybe
import Data.Ord (comparing)
-- import Data.Text (Text)
import qualified Data.Text as T
import Data.Time.Calendar
import Safe (headMay, lastMay)
import Test.HUnit
@ -154,7 +156,7 @@ mkpostingsReportItem showdate showdesc wd menddate p b =
where
date = case wd of PrimaryDate -> postingDate p
SecondaryDate -> postingDate2 p
desc = maybe "" tdescription $ ptransaction p
desc = T.unpack $ maybe "" tdescription $ ptransaction p
-- | Convert a list of postings into summary postings, one per interval,
-- aggregated to the specified depth if any.

View File

@ -65,7 +65,7 @@ initRegisterScreen d st@AppState{aopts=opts, ajournal=j, aScreen=s@RegisterScree
-- pre-render all items; these will be the List elements. This helps calculate column widths.
displayitem (t, _, _issplit, otheracctsstr, change, bal) =
(showDate $ tdate t
,tdescription t
,T.unpack $ tdescription t
,case splitOn ", " otheracctsstr of
[s] -> s
ss -> intercalate ", " ss

View File

@ -106,7 +106,7 @@ postAddForm = do
| otherwise = either (\e -> Left [L.head $ lines e]) Right
(balanceTransaction Nothing $ nulltransaction {
tdate=date
,tdescription=desc
,tdescription=T.pack desc
,tpostings=[nullposting{paccount=acct, pamount=Mixed [amt]} | (acct,amt) <- zip accts amts]
})
case etxn of

View File

@ -64,7 +64,7 @@ journalTransactionsReportAsHtml _ vd (_,items) = [hamlet|
<tbody ##{tindex torig}>
<tr .item.#{evenodd}.#{firstposting} style="vertical-align:top;" title="#{show torig}">
<td.date>#{date}
<td.description colspan=2>#{elideRight 60 desc}
<td.description colspan=2>#{textElideRight 60 desc}
<td.amount style="text-align:right;">
$if showamt
\#{mixedAmountAsHtml amt}

View File

@ -78,7 +78,7 @@ registerItemsHtml _ vd (balancelabel,items) = [hamlet|
<tr ##{tindex torig} .item.#{evenodd}.#{firstposting}.#{datetransition} title="#{show torig}" style="vertical-align:top;">
<td.date>
<a href="@{JournalR}##{tindex torig}">#{date}
<td.description title="#{show torig}">#{elideRight 30 desc}
<td.description title="#{show torig}">#{textElideRight 30 desc}
<td.account>#{elideRight 40 acct}
<td.amount style="text-align:right; white-space:nowrap;">
$if showamt

View File

@ -162,7 +162,7 @@ transactionWizard es@EntryState{..} = do
balancedPostingsWizard
-- Identify the closest recent match for this description in past transactions.
similarTransaction :: EntryState -> String -> Maybe Transaction
similarTransaction :: EntryState -> Text -> Maybe Transaction
similarTransaction EntryState{..} desc =
let q = queryFromOptsOnly esToday $ reportopts_ esOpts
historymatches = transactionsSimilarTo esJournal q desc
@ -184,13 +184,13 @@ dateAndCodeWizard EntryState{..} = do
parseSmartDateAndCode refdate s = either (const Nothing) (\(d,c) -> return (fixSmartDate refdate d, c)) edc
where
edc = runParser (dateandcodep <* eof) mempty "" $ T.pack $ lowercase s
dateandcodep :: Monad m => JournalParser m (SmartDate, String)
dateandcodep :: Monad m => JournalParser m (SmartDate, Text)
dateandcodep = do
d <- smartdate
c <- optionMaybe codep
many spacenonewline
eof
return (d, fromMaybe "" c)
return (d, T.pack $ fromMaybe "" c)
-- defday = fixSmartDate today $ fromparse $ (parse smartdate "" . lowercase) defdate
-- datestr = showDate $ fixSmartDate defday smtdate
@ -200,8 +200,8 @@ descriptionAndCommentWizard EntryState{..} = do
defaultTo' def $ nonEmpty $
maybeRestartTransaction $
line $ green $ printf "Description%s: " (showDefault def)
let (desc,comment) = (strip a, strip $ dropWhile (==';') b) where (a,b) = break (==';') s
return (desc, T.pack comment)
let (desc,comment) = (T.pack $ strip a, T.pack $ strip $ dropWhile (==';') b) where (a,b) = break (==';') s
return (desc, comment)
postingsWizard es@EntryState{..} = do
mp <- postingWizard es
@ -319,7 +319,7 @@ dateCompleter :: String -> CompletionFunc IO
dateCompleter = completer ["today","tomorrow","yesterday"]
descriptionCompleter :: Journal -> String -> CompletionFunc IO
descriptionCompleter j = completer (journalDescriptions j)
descriptionCompleter j = completer (map T.unpack $ journalDescriptions j)
accountCompleter :: Journal -> String -> CompletionFunc IO
accountCompleter j = completer (map T.unpack $ journalAccountNamesUsed j)
@ -396,7 +396,7 @@ capitalize (c:cs) = toUpper c : cs
-- | Find the most similar and recent transactions matching the given
-- transaction description and report query. Transactions are listed
-- with their "relevancy" score, most relevant first.
transactionsSimilarTo :: Journal -> Query -> String -> [(Double,Transaction)]
transactionsSimilarTo :: Journal -> Query -> Text -> [(Double,Transaction)]
transactionsSimilarTo j q desc =
sortBy compareRelevanceAndRecency
$ filter ((> threshold).fst)
@ -410,10 +410,10 @@ transactionsSimilarTo j q desc =
-- descriptions. This is like compareStrings, but first strips out
-- any numbers, to improve accuracy eg when there are bank transaction
-- ids from imported data.
compareDescriptions :: String -> String -> Double
compareDescriptions :: Text -> Text -> Double
compareDescriptions s t = compareStrings s' t'
where s' = simplify s
t' = simplify t
where s' = simplify $ T.unpack s
t' = simplify $ T.unpack t
simplify = filter (not . (`elem` ("0123456789" :: String)))
-- | Return a similarity measure, from 0 to 1, for two strings. This

View File

@ -15,7 +15,7 @@ module Hledger.Cli.Print (
where
import Data.List
-- import Data.Text (Text)
import Data.Text (Text)
import qualified Data.Text as T
import System.Console.CmdArgs.Explicit
import Test.HUnit
@ -49,7 +49,7 @@ print' :: CliOpts -> Journal -> IO ()
print' opts j = do
case maybestringopt "match" $ rawopts_ opts of
Nothing -> printEntries opts j
Just desc -> printMatch opts j desc
Just desc -> printMatch opts j $ T.pack desc
printEntries :: CliOpts -> Journal -> IO ()
printEntries opts@CliOpts{reportopts_=ropts} j = do
@ -116,11 +116,11 @@ transactionToCSV n t =
map (\p -> show n:date:date2:status:code:description:comment:p)
(concatMap postingToCSV $ tpostings t)
where
description = tdescription t
description = T.unpack $ tdescription t
date = showDate (tdate t)
date2 = maybe "" showDate (tdate2 t)
status = show $ tstatus t
code = tcode t
code = T.unpack $ tcode t
comment = chomp $ strip $ T.unpack $ tcomment t
postingToCSV :: Posting -> CSV
@ -143,7 +143,7 @@ postingToCSV p =
-- | Print the transaction most closely and recently matching a description
-- (and the query, if any).
printMatch :: CliOpts -> Journal -> String -> IO ()
printMatch :: CliOpts -> Journal -> Text -> IO ()
printMatch CliOpts{reportopts_=ropts} j desc = do
d <- getCurrentDay
let q = queryFromOpts d ropts
@ -153,7 +153,7 @@ printMatch CliOpts{reportopts_=ropts} j desc = do
where
-- Identify the closest recent match for this description in past transactions.
similarTransaction' :: Journal -> Query -> String -> Maybe Transaction
similarTransaction' :: Journal -> Query -> Text -> Maybe Transaction
similarTransaction' j q desc
| null historymatches = Nothing
| otherwise = Just $ snd $ head historymatches

View File

@ -72,7 +72,7 @@ postingsReportItemAsCsvRecord :: PostingsReportItem -> Record
postingsReportItemAsCsvRecord (_, _, _, p, b) = [date,desc,acct,amt,bal]
where
date = showDate $ postingDate p -- XXX csv should show date2 with --date2
desc = maybe "" tdescription $ ptransaction p
desc = T.unpack $ maybe "" tdescription $ ptransaction p
acct = bracket $ T.unpack $ paccount p
where
bracket = case ptype p of