From f78dc639a59be6466e6da84591258c8011e2ece2 Mon Sep 17 00:00:00 2001
From: Simon Michael <simon@joyful.com>
Date: Thu, 10 Sep 2020 17:46:16 -0700
Subject: [PATCH] fix a slowdown with report rendering in 1.19.1 (#1350)

stripAnsi is called many times during rendering (by strWidth), so
should be fast. It was originally a regex replacement, and more
recently a custom parser. The parser was slower, particularly the one
in 1.19.1. See #1350, and this rough test:

time118ish = timeIt $ print $ length $ concat $ map (fromRight undefined . regexReplace (toRegex' "\ESC\\[([0-9]+;)*([0-9]+)?[ABCDHJKfmsu]") "") testdata
time119    = timeparser (many (takeWhile1P Nothing (/='\ESC') <|> "" <$ ansi))
time1191   = timeparser (many ("" <$ try ansi <|> pure <$> anySingle))
timeparser p = timeIt $ print $ length $ concat $ map (concat . fromJust . parseMaybe p) testdata
testdata = concat $ replicate 10000
    [ "2008-01-01 income               assets:bank:checking            $1            $1"
    , "2008-06-01 gift                 assets:bank:checking            $1            $2"
    , "2008-06-02 save                 assets:bank:saving              $1            $3"
    , "                                assets:bank:checking  ..m$-1\ESC[m\ESC[m            $2"
    , "2008-06-03 eat & shop           assets:cash           ..m$-2\ESC[m\ESC[m             0"
    , "2008-12-31 pay off              assets:bank:checking  ..m$-1\ESC[m\ESC[m  ..m$-1\ESC[m\ESC[m"
    ]

ghci> time118ish
4560000
CPU time:   0.17s
ghci> time119
4560000
CPU time:   0.91s
ghci> time1191
4560000
CPU time:   2.76s

Possibly a more careful parser could beat regexReplace. Note the
latter does memoisation, which could be faster and/or could also use
more resident memory in some situations.

Ideally we would calculate all widths before adding ANSI colour codes,
so we wouldn't have to wastefully strip them.
---
 hledger-lib/Hledger/Utils/String.hs | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/hledger-lib/Hledger/Utils/String.hs b/hledger-lib/Hledger/Utils/String.hs
index a3d7e3eba..817c4799f 100644
--- a/hledger-lib/Hledger/Utils/String.hs
+++ b/hledger-lib/Hledger/Utils/String.hs
@@ -48,15 +48,14 @@ module Hledger.Utils.String (
  ) where
 
 
-import Data.Char (isDigit, isSpace, toLower, toUpper)
+import Data.Char (isSpace, toLower, toUpper)
 import Data.List (intercalate, transpose)
-import Text.Megaparsec (Parsec, (<|>), (<?>), anySingle, between, many, noneOf,
-                        oneOf, parseMaybe, sepBy, takeWhileP, try)
-import Text.Megaparsec.Char (char, string)
+import Text.Megaparsec ((<|>), between, many, noneOf, sepBy)
+import Text.Megaparsec.Char (char)
 import Text.Printf (printf)
 
 import Hledger.Utils.Parse
-
+import Hledger.Utils.Regex (toRegex', regexReplace)
 
 -- | Take elements from the end of a list.
 takeEnd n l = go (drop n l) l
@@ -342,14 +341,10 @@ strWidth = maximum . (0:) . map (foldr (\a b -> charWidth a + b) 0) . lines . st
 -- >>> stripAnsi "\ESC[31m-1\ESC[m"
 -- "-1"
 stripAnsi :: String -> String
-stripAnsi s = case parseMaybe (many $ "" <$ try ansi <|> pure <$> anySingle) s of
-    Nothing -> error "Bad ansi escape"  -- PARTIAL: should not happen
-    Just xs -> concat xs
-  where
-    -- This parses lots of invalid ANSI escape codes, but that should be fine
-    ansi = string "\ESC[" *> digitSemicolons *> suffix <?> "ansi" :: Parsec CustomErr String Char
-    digitSemicolons = takeWhileP Nothing (\c -> isDigit c || c == ';')
-    suffix = oneOf ['A', 'B', 'C', 'D', 'H', 'J', 'K', 'f', 'm', 's', 'u']
+stripAnsi s = either err id $ regexReplace ansire "" s
+ where
+   err    = error "stripAnsi: invalid replacement pattern"      -- PARTIAL, shouldn't happen
+   ansire = toRegex' "\ESC\\[([0-9]+;)*([0-9]+)?[ABCDHJKfmsu]"  -- PARTIAL, should succeed
 
 -- | Get the designated render width of a character: 0 for a combining
 -- character, 1 for a regular character, 2 for a wide character.