From 90c9735b7a7f7680fc99579dd8445f516854ae2c Mon Sep 17 00:00:00 2001 From: Simon Michael Date: Tue, 24 May 2016 18:51:52 -0700 Subject: [PATCH] lib: textification: descriptions & codes Slightly higher (with small files) and lower (with large files) maximum residency, and slightly quicker for all. hledger -f data/100x100x10.journal stats <> <> hledger -f data/1000x1000x10.journal stats <> <> hledger -f data/10000x1000x10.journal stats <> <> hledger -f data/100000x1000x10.journal stats <> <> --- hledger-lib/Hledger/Data/Journal.hs | 2 +- hledger-lib/Hledger/Data/Timeclock.hs | 8 ++++---- hledger-lib/Hledger/Data/Transaction.hs | 8 ++++---- hledger-lib/Hledger/Data/Types.hs | 10 +++++----- hledger-lib/Hledger/Query.hs | 8 ++++---- hledger-lib/Hledger/Read/CsvReader.hs | 4 ++-- hledger-lib/Hledger/Read/JournalReader.hs | 8 ++++---- hledger-lib/Hledger/Read/TimeclockReader.hs | 2 +- hledger-lib/Hledger/Reports/PostingsReport.hs | 4 +++- hledger-ui/Hledger/UI/RegisterScreen.hs | 2 +- hledger-web/Handler/AddForm.hs | 2 +- hledger-web/Handler/JournalR.hs | 2 +- hledger-web/Handler/RegisterR.hs | 2 +- hledger/Hledger/Cli/Add.hs | 20 +++++++++---------- hledger/Hledger/Cli/Print.hs | 12 +++++------ hledger/Hledger/Cli/Register.hs | 2 +- 16 files changed, 49 insertions(+), 47 deletions(-) diff --git a/hledger-lib/Hledger/Data/Journal.hs b/hledger-lib/Hledger/Data/Journal.hs index 98382a5fa..69e784ae1 100644 --- a/hledger-lib/Hledger/Data/Journal.hs +++ b/hledger-lib/Hledger/Data/Journal.hs @@ -215,7 +215,7 @@ journalPrevTransaction :: Journal -> Transaction -> Maybe Transaction journalPrevTransaction j t = journalTransactionAt j (tindex t - 1) -- | Unique transaction descriptions used in this journal. -journalDescriptions :: Journal -> [String] +journalDescriptions :: Journal -> [Text] journalDescriptions = nub . sort . map tdescription . jtxns -- | All postings from this journal's transactions, in order. diff --git a/hledger-lib/Hledger/Data/Timeclock.hs b/hledger-lib/Hledger/Data/Timeclock.hs index ae4e6818c..ee7beb4e8 100644 --- a/hledger-lib/Hledger/Data/Timeclock.hs +++ b/hledger-lib/Hledger/Data/Timeclock.hs @@ -12,7 +12,7 @@ module Hledger.Data.Timeclock where import Data.Maybe -- import Data.Text (Text) --- import qualified Data.Text as T +import qualified Data.Text as T import Data.Time.Calendar import Data.Time.Clock import Data.Time.Format @@ -99,8 +99,8 @@ entryFromTimeclockInOut i o itod = localTimeOfDay itime otod = localTimeOfDay otime idate = localDay itime - desc | null (tldescription i) = showtime itod ++ "-" ++ showtime otod - | otherwise = tldescription i + desc | T.null (tldescription i) = T.pack $ showtime itod ++ "-" ++ showtime otod + | otherwise = tldescription i showtime = take 5 . show hours = elapsedSeconds (toutc otime) (toutc itime) / 3600 where toutc = localTimeToUTC utc acctname = tlaccount i @@ -125,7 +125,7 @@ tests_Hledger_Data_Timeclock = TestList [ parseTime defaultTimeLocale "%H:%M:%S" #endif showtime = formatTime defaultTimeLocale "%H:%M" - assertEntriesGiveStrings name es ss = assertEqual name ss (map tdescription $ timeclockEntriesToTransactions now es) + assertEntriesGiveStrings name es ss = assertEqual name ss (map (T.unpack . tdescription) $ timeclockEntriesToTransactions now es) assertEntriesGiveStrings "started yesterday, split session at midnight" [clockin (mktime yesterday "23:00:00") "" ""] diff --git a/hledger-lib/Hledger/Data/Transaction.hs b/hledger-lib/Hledger/Data/Transaction.hs index f28a2638e..a5102464d 100644 --- a/hledger-lib/Hledger/Data/Transaction.hs +++ b/hledger-lib/Hledger/Data/Transaction.hs @@ -58,10 +58,10 @@ import Hledger.Data.Amount instance Show Transaction where show = showTransactionUnelided instance Show ModifierTransaction where - show t = "= " ++ mtvalueexpr t ++ "\n" ++ unlines (map show (mtpostings t)) + show t = "= " ++ T.unpack (mtvalueexpr t) ++ "\n" ++ unlines (map show (mtpostings t)) instance Show PeriodicTransaction where - show t = "~ " ++ ptperiodicexpr t ++ "\n" ++ unlines (map show (ptpostings t)) + show t = "~ " ++ T.unpack (ptperiodicexpr t) ++ "\n" ++ unlines (map show (ptpostings t)) nullsourcepos :: GenericSourcePos nullsourcepos = GenericSourcePos "" 1 1 @@ -152,8 +152,8 @@ showTransactionHelper elide onelineamounts t = status | tstatus t == Cleared = " *" | tstatus t == Pending = " !" | otherwise = "" - code = if length (tcode t) > 0 then printf " (%s)" $ tcode t else "" - desc = if null d then "" else " " ++ d where d = tdescription t + code = if T.length (tcode t) > 0 then printf " (%s)" $ T.unpack $ tcode t else "" + desc = if null d then "" else " " ++ d where d = T.unpack $ tdescription t (samelinecomment, newlinecomments) = case renderCommentLines (tcomment t) of [] -> ("",[]) c:cs -> (c,cs) diff --git a/hledger-lib/Hledger/Data/Types.hs b/hledger-lib/Hledger/Data/Types.hs index 075656fcf..2e74b96ad 100644 --- a/hledger-lib/Hledger/Data/Types.hs +++ b/hledger-lib/Hledger/Data/Types.hs @@ -176,8 +176,8 @@ data Transaction = Transaction { tdate :: Day, tdate2 :: Maybe Day, tstatus :: ClearedStatus, - tcode :: String, - tdescription :: String, + tcode :: Text, + tdescription :: Text, tcomment :: Text, -- ^ this transaction's comment lines, as a single non-indented multi-line string ttags :: [Tag], -- ^ tag names and values, extracted from the comment tpostings :: [Posting], -- ^ this transaction's postings @@ -187,14 +187,14 @@ data Transaction = Transaction { instance NFData Transaction data ModifierTransaction = ModifierTransaction { - mtvalueexpr :: String, + mtvalueexpr :: Text, mtpostings :: [Posting] } deriving (Eq,Typeable,Data,Generic) instance NFData ModifierTransaction data PeriodicTransaction = PeriodicTransaction { - ptperiodicexpr :: String, + ptperiodicexpr :: Text, ptpostings :: [Posting] } deriving (Eq,Typeable,Data,Generic) @@ -209,7 +209,7 @@ data TimeclockEntry = TimeclockEntry { tlcode :: TimeclockCode, tldatetime :: LocalTime, tlaccount :: AccountName, - tldescription :: String + tldescription :: Text } deriving (Eq,Ord,Typeable,Data,Generic) instance NFData TimeclockEntry diff --git a/hledger-lib/Hledger/Query.hs b/hledger-lib/Hledger/Query.hs index 791b7c79e..29a804254 100644 --- a/hledger-lib/Hledger/Query.hs +++ b/hledger-lib/Hledger/Query.hs @@ -635,8 +635,8 @@ matchesPosting (Any) _ = True matchesPosting (None) _ = False matchesPosting (Or qs) p = any (`matchesPosting` p) qs matchesPosting (And qs) p = all (`matchesPosting` p) qs -matchesPosting (Code r) p = regexMatchesCI r $ maybe "" tcode $ ptransaction p -matchesPosting (Desc r) p = regexMatchesCI r $ maybe "" tdescription $ ptransaction p +matchesPosting (Code r) p = regexMatchesCI r $ maybe "" (T.unpack . tcode) $ ptransaction p +matchesPosting (Desc r) p = regexMatchesCI r $ maybe "" (T.unpack . tdescription) $ ptransaction p matchesPosting (Acct r) p = regexMatchesCI r $ T.unpack $ paccount p -- XXX pack matchesPosting (Date span) p = span `spanContainsDate` postingDate p matchesPosting (Date2 span) p = span `spanContainsDate` postingDate2 p @@ -693,8 +693,8 @@ matchesTransaction (Any) _ = True matchesTransaction (None) _ = False matchesTransaction (Or qs) t = any (`matchesTransaction` t) qs matchesTransaction (And qs) t = all (`matchesTransaction` t) qs -matchesTransaction (Code r) t = regexMatchesCI r $ tcode t -matchesTransaction (Desc r) t = regexMatchesCI r $ tdescription t +matchesTransaction (Code r) t = regexMatchesCI r $ T.unpack $ tcode t +matchesTransaction (Desc r) t = regexMatchesCI r $ T.unpack $ tdescription t matchesTransaction q@(Acct _) t = any (q `matchesPosting`) $ tpostings t matchesTransaction (Date span) t = spanContainsDate span $ tdate t matchesTransaction (Date2 span) t = spanContainsDate span $ transactionDate2 t diff --git a/hledger-lib/Hledger/Read/CsvReader.hs b/hledger-lib/Hledger/Read/CsvReader.hs index b844d2e42..7e3805901 100644 --- a/hledger-lib/Hledger/Read/CsvReader.hs +++ b/hledger-lib/Hledger/Read/CsvReader.hs @@ -649,8 +649,8 @@ transactionFromCsvRecord sourcepos rules record = t tdate = date', tdate2 = mdate2', tstatus = status, - tcode = code, - tdescription = description, + tcode = T.pack code, + tdescription = T.pack description, tcomment = T.pack comment, tpreceding_comment_lines = T.pack precomment, tpostings = diff --git a/hledger-lib/Hledger/Read/JournalReader.hs b/hledger-lib/Hledger/Read/JournalReader.hs index af82b5181..60847f038 100644 --- a/hledger-lib/Hledger/Read/JournalReader.hs +++ b/hledger-lib/Hledger/Read/JournalReader.hs @@ -411,7 +411,7 @@ modifiertransactionp :: ErroringJournalParser ModifierTransaction modifiertransactionp = do char '=' "modifier transaction" many spacenonewline - valueexpr <- restofline + valueexpr <- T.pack <$> restofline postings <- postingsp Nothing return $ ModifierTransaction valueexpr postings @@ -419,7 +419,7 @@ periodictransactionp :: ErroringJournalParser PeriodicTransaction periodictransactionp = do char '~' "periodic transaction" many spacenonewline - periodexpr <- restofline + periodexpr <- T.pack <$> restofline postings <- postingsp Nothing return $ PeriodicTransaction periodexpr postings @@ -432,8 +432,8 @@ transactionp = do edate <- optionMaybe (secondarydatep date) "secondary date" lookAhead (spacenonewline <|> newline) "whitespace or newline" status <- statusp "cleared status" - code <- codep "transaction code" - description <- strip <$> descriptionp + code <- T.pack <$> codep "transaction code" + description <- T.pack . strip <$> descriptionp comment <- try followingcommentp <|> (newline >> return "") let tags = commentTags comment postings <- postingsp (Just date) diff --git a/hledger-lib/Hledger/Read/TimeclockReader.hs b/hledger-lib/Hledger/Read/TimeclockReader.hs index f6315afc7..9f128b53f 100644 --- a/hledger-lib/Hledger/Read/TimeclockReader.hs +++ b/hledger-lib/Hledger/Read/TimeclockReader.hs @@ -114,7 +114,7 @@ timeclockentryp = do many1 spacenonewline datetime <- datetimep account <- fromMaybe "" <$> optionMaybe (many1 spacenonewline >> modifiedaccountnamep) - description <- fromMaybe "" <$> optionMaybe (many1 spacenonewline >> restofline) + description <- T.pack . fromMaybe "" <$> optionMaybe (many1 spacenonewline >> restofline) return $ TimeclockEntry sourcepos (read [code]) datetime account description tests_Hledger_Read_TimeclockReader = TestList [ diff --git a/hledger-lib/Hledger/Reports/PostingsReport.hs b/hledger-lib/Hledger/Reports/PostingsReport.hs index faa48316c..40277784b 100644 --- a/hledger-lib/Hledger/Reports/PostingsReport.hs +++ b/hledger-lib/Hledger/Reports/PostingsReport.hs @@ -19,6 +19,8 @@ where import Data.List import Data.Maybe import Data.Ord (comparing) +-- import Data.Text (Text) +import qualified Data.Text as T import Data.Time.Calendar import Safe (headMay, lastMay) import Test.HUnit @@ -154,7 +156,7 @@ mkpostingsReportItem showdate showdesc wd menddate p b = where date = case wd of PrimaryDate -> postingDate p SecondaryDate -> postingDate2 p - desc = maybe "" tdescription $ ptransaction p + desc = T.unpack $ maybe "" tdescription $ ptransaction p -- | Convert a list of postings into summary postings, one per interval, -- aggregated to the specified depth if any. diff --git a/hledger-ui/Hledger/UI/RegisterScreen.hs b/hledger-ui/Hledger/UI/RegisterScreen.hs index f789e15e5..a94f3adb0 100644 --- a/hledger-ui/Hledger/UI/RegisterScreen.hs +++ b/hledger-ui/Hledger/UI/RegisterScreen.hs @@ -65,7 +65,7 @@ initRegisterScreen d st@AppState{aopts=opts, ajournal=j, aScreen=s@RegisterScree -- pre-render all items; these will be the List elements. This helps calculate column widths. displayitem (t, _, _issplit, otheracctsstr, change, bal) = (showDate $ tdate t - ,tdescription t + ,T.unpack $ tdescription t ,case splitOn ", " otheracctsstr of [s] -> s ss -> intercalate ", " ss diff --git a/hledger-web/Handler/AddForm.hs b/hledger-web/Handler/AddForm.hs index 58a4adcd9..71d31fb29 100644 --- a/hledger-web/Handler/AddForm.hs +++ b/hledger-web/Handler/AddForm.hs @@ -106,7 +106,7 @@ postAddForm = do | otherwise = either (\e -> Left [L.head $ lines e]) Right (balanceTransaction Nothing $ nulltransaction { tdate=date - ,tdescription=desc + ,tdescription=T.pack desc ,tpostings=[nullposting{paccount=acct, pamount=Mixed [amt]} | (acct,amt) <- zip accts amts] }) case etxn of diff --git a/hledger-web/Handler/JournalR.hs b/hledger-web/Handler/JournalR.hs index 69a5b94bd..179bdacc0 100644 --- a/hledger-web/Handler/JournalR.hs +++ b/hledger-web/Handler/JournalR.hs @@ -64,7 +64,7 @@ journalTransactionsReportAsHtml _ vd (_,items) = [hamlet| #{date} - #{elideRight 60 desc} + #{textElideRight 60 desc} $if showamt \#{mixedAmountAsHtml amt} diff --git a/hledger-web/Handler/RegisterR.hs b/hledger-web/Handler/RegisterR.hs index 187a0c7b3..634463b0c 100644 --- a/hledger-web/Handler/RegisterR.hs +++ b/hledger-web/Handler/RegisterR.hs @@ -78,7 +78,7 @@ registerItemsHtml _ vd (balancelabel,items) = [hamlet| #{date} - #{elideRight 30 desc} + #{textElideRight 30 desc} #{elideRight 40 acct} $if showamt diff --git a/hledger/Hledger/Cli/Add.hs b/hledger/Hledger/Cli/Add.hs index a966cd1f6..c590a33ab 100644 --- a/hledger/Hledger/Cli/Add.hs +++ b/hledger/Hledger/Cli/Add.hs @@ -162,7 +162,7 @@ transactionWizard es@EntryState{..} = do balancedPostingsWizard -- Identify the closest recent match for this description in past transactions. -similarTransaction :: EntryState -> String -> Maybe Transaction +similarTransaction :: EntryState -> Text -> Maybe Transaction similarTransaction EntryState{..} desc = let q = queryFromOptsOnly esToday $ reportopts_ esOpts historymatches = transactionsSimilarTo esJournal q desc @@ -184,13 +184,13 @@ dateAndCodeWizard EntryState{..} = do parseSmartDateAndCode refdate s = either (const Nothing) (\(d,c) -> return (fixSmartDate refdate d, c)) edc where edc = runParser (dateandcodep <* eof) mempty "" $ T.pack $ lowercase s - dateandcodep :: Monad m => JournalParser m (SmartDate, String) + dateandcodep :: Monad m => JournalParser m (SmartDate, Text) dateandcodep = do d <- smartdate c <- optionMaybe codep many spacenonewline eof - return (d, fromMaybe "" c) + return (d, T.pack $ fromMaybe "" c) -- defday = fixSmartDate today $ fromparse $ (parse smartdate "" . lowercase) defdate -- datestr = showDate $ fixSmartDate defday smtdate @@ -200,8 +200,8 @@ descriptionAndCommentWizard EntryState{..} = do defaultTo' def $ nonEmpty $ maybeRestartTransaction $ line $ green $ printf "Description%s: " (showDefault def) - let (desc,comment) = (strip a, strip $ dropWhile (==';') b) where (a,b) = break (==';') s - return (desc, T.pack comment) + let (desc,comment) = (T.pack $ strip a, T.pack $ strip $ dropWhile (==';') b) where (a,b) = break (==';') s + return (desc, comment) postingsWizard es@EntryState{..} = do mp <- postingWizard es @@ -319,7 +319,7 @@ dateCompleter :: String -> CompletionFunc IO dateCompleter = completer ["today","tomorrow","yesterday"] descriptionCompleter :: Journal -> String -> CompletionFunc IO -descriptionCompleter j = completer (journalDescriptions j) +descriptionCompleter j = completer (map T.unpack $ journalDescriptions j) accountCompleter :: Journal -> String -> CompletionFunc IO accountCompleter j = completer (map T.unpack $ journalAccountNamesUsed j) @@ -396,7 +396,7 @@ capitalize (c:cs) = toUpper c : cs -- | Find the most similar and recent transactions matching the given -- transaction description and report query. Transactions are listed -- with their "relevancy" score, most relevant first. -transactionsSimilarTo :: Journal -> Query -> String -> [(Double,Transaction)] +transactionsSimilarTo :: Journal -> Query -> Text -> [(Double,Transaction)] transactionsSimilarTo j q desc = sortBy compareRelevanceAndRecency $ filter ((> threshold).fst) @@ -410,10 +410,10 @@ transactionsSimilarTo j q desc = -- descriptions. This is like compareStrings, but first strips out -- any numbers, to improve accuracy eg when there are bank transaction -- ids from imported data. -compareDescriptions :: String -> String -> Double +compareDescriptions :: Text -> Text -> Double compareDescriptions s t = compareStrings s' t' - where s' = simplify s - t' = simplify t + where s' = simplify $ T.unpack s + t' = simplify $ T.unpack t simplify = filter (not . (`elem` ("0123456789" :: String))) -- | Return a similarity measure, from 0 to 1, for two strings. This diff --git a/hledger/Hledger/Cli/Print.hs b/hledger/Hledger/Cli/Print.hs index dc821d7dc..92d462075 100644 --- a/hledger/Hledger/Cli/Print.hs +++ b/hledger/Hledger/Cli/Print.hs @@ -15,7 +15,7 @@ module Hledger.Cli.Print ( where import Data.List --- import Data.Text (Text) +import Data.Text (Text) import qualified Data.Text as T import System.Console.CmdArgs.Explicit import Test.HUnit @@ -49,7 +49,7 @@ print' :: CliOpts -> Journal -> IO () print' opts j = do case maybestringopt "match" $ rawopts_ opts of Nothing -> printEntries opts j - Just desc -> printMatch opts j desc + Just desc -> printMatch opts j $ T.pack desc printEntries :: CliOpts -> Journal -> IO () printEntries opts@CliOpts{reportopts_=ropts} j = do @@ -116,11 +116,11 @@ transactionToCSV n t = map (\p -> show n:date:date2:status:code:description:comment:p) (concatMap postingToCSV $ tpostings t) where - description = tdescription t + description = T.unpack $ tdescription t date = showDate (tdate t) date2 = maybe "" showDate (tdate2 t) status = show $ tstatus t - code = tcode t + code = T.unpack $ tcode t comment = chomp $ strip $ T.unpack $ tcomment t postingToCSV :: Posting -> CSV @@ -143,7 +143,7 @@ postingToCSV p = -- | Print the transaction most closely and recently matching a description -- (and the query, if any). -printMatch :: CliOpts -> Journal -> String -> IO () +printMatch :: CliOpts -> Journal -> Text -> IO () printMatch CliOpts{reportopts_=ropts} j desc = do d <- getCurrentDay let q = queryFromOpts d ropts @@ -153,7 +153,7 @@ printMatch CliOpts{reportopts_=ropts} j desc = do where -- Identify the closest recent match for this description in past transactions. - similarTransaction' :: Journal -> Query -> String -> Maybe Transaction + similarTransaction' :: Journal -> Query -> Text -> Maybe Transaction similarTransaction' j q desc | null historymatches = Nothing | otherwise = Just $ snd $ head historymatches diff --git a/hledger/Hledger/Cli/Register.hs b/hledger/Hledger/Cli/Register.hs index 3a1d37272..7368f70a3 100644 --- a/hledger/Hledger/Cli/Register.hs +++ b/hledger/Hledger/Cli/Register.hs @@ -72,7 +72,7 @@ postingsReportItemAsCsvRecord :: PostingsReportItem -> Record postingsReportItemAsCsvRecord (_, _, _, p, b) = [date,desc,acct,amt,bal] where date = showDate $ postingDate p -- XXX csv should show date2 with --date2 - desc = maybe "" tdescription $ ptransaction p + desc = T.unpack $ maybe "" tdescription $ ptransaction p acct = bracket $ T.unpack $ paccount p where bracket = case ptype p of