From f78dc639a59be6466e6da84591258c8011e2ece2 Mon Sep 17 00:00:00 2001 From: Simon Michael Date: Thu, 10 Sep 2020 17:46:16 -0700 Subject: [PATCH] fix a slowdown with report rendering in 1.19.1 (#1350) stripAnsi is called many times during rendering (by strWidth), so should be fast. It was originally a regex replacement, and more recently a custom parser. The parser was slower, particularly the one in 1.19.1. See #1350, and this rough test: time118ish = timeIt $ print $ length $ concat $ map (fromRight undefined . regexReplace (toRegex' "\ESC\\[([0-9]+;)*([0-9]+)?[ABCDHJKfmsu]") "") testdata time119 = timeparser (many (takeWhile1P Nothing (/='\ESC') <|> "" <$ ansi)) time1191 = timeparser (many ("" <$ try ansi <|> pure <$> anySingle)) timeparser p = timeIt $ print $ length $ concat $ map (concat . fromJust . parseMaybe p) testdata testdata = concat $ replicate 10000 [ "2008-01-01 income assets:bank:checking $1 $1" , "2008-06-01 gift assets:bank:checking $1 $2" , "2008-06-02 save assets:bank:saving $1 $3" , " assets:bank:checking ..m$-1\ESC[m\ESC[m $2" , "2008-06-03 eat & shop assets:cash ..m$-2\ESC[m\ESC[m 0" , "2008-12-31 pay off assets:bank:checking ..m$-1\ESC[m\ESC[m ..m$-1\ESC[m\ESC[m" ] ghci> time118ish 4560000 CPU time: 0.17s ghci> time119 4560000 CPU time: 0.91s ghci> time1191 4560000 CPU time: 2.76s Possibly a more careful parser could beat regexReplace. Note the latter does memoisation, which could be faster and/or could also use more resident memory in some situations. Ideally we would calculate all widths before adding ANSI colour codes, so we wouldn't have to wastefully strip them. --- hledger-lib/Hledger/Utils/String.hs | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/hledger-lib/Hledger/Utils/String.hs b/hledger-lib/Hledger/Utils/String.hs index a3d7e3eba..817c4799f 100644 --- a/hledger-lib/Hledger/Utils/String.hs +++ b/hledger-lib/Hledger/Utils/String.hs @@ -48,15 +48,14 @@ module Hledger.Utils.String ( ) where -import Data.Char (isDigit, isSpace, toLower, toUpper) +import Data.Char (isSpace, toLower, toUpper) import Data.List (intercalate, transpose) -import Text.Megaparsec (Parsec, (<|>), (), anySingle, between, many, noneOf, - oneOf, parseMaybe, sepBy, takeWhileP, try) -import Text.Megaparsec.Char (char, string) +import Text.Megaparsec ((<|>), between, many, noneOf, sepBy) +import Text.Megaparsec.Char (char) import Text.Printf (printf) import Hledger.Utils.Parse - +import Hledger.Utils.Regex (toRegex', regexReplace) -- | Take elements from the end of a list. takeEnd n l = go (drop n l) l @@ -342,14 +341,10 @@ strWidth = maximum . (0:) . map (foldr (\a b -> charWidth a + b) 0) . lines . st -- >>> stripAnsi "\ESC[31m-1\ESC[m" -- "-1" stripAnsi :: String -> String -stripAnsi s = case parseMaybe (many $ "" <$ try ansi <|> pure <$> anySingle) s of - Nothing -> error "Bad ansi escape" -- PARTIAL: should not happen - Just xs -> concat xs - where - -- This parses lots of invalid ANSI escape codes, but that should be fine - ansi = string "\ESC[" *> digitSemicolons *> suffix "ansi" :: Parsec CustomErr String Char - digitSemicolons = takeWhileP Nothing (\c -> isDigit c || c == ';') - suffix = oneOf ['A', 'B', 'C', 'D', 'H', 'J', 'K', 'f', 'm', 's', 'u'] +stripAnsi s = either err id $ regexReplace ansire "" s + where + err = error "stripAnsi: invalid replacement pattern" -- PARTIAL, shouldn't happen + ansire = toRegex' "\ESC\\[([0-9]+;)*([0-9]+)?[ABCDHJKfmsu]" -- PARTIAL, should succeed -- | Get the designated render width of a character: 0 for a combining -- character, 1 for a regular character, 2 for a wide character.