fix a slowdown with report rendering in 1.19.1 (#1350)
stripAnsi is called many times during rendering (by strWidth), so should be fast. It was originally a regex replacement, and more recently a custom parser. The parser was slower, particularly the one in 1.19.1. See #1350, and this rough test: time118ish = timeIt $ print $ length $ concat $ map (fromRight undefined . regexReplace (toRegex' "\ESC\\[([0-9]+;)*([0-9]+)?[ABCDHJKfmsu]") "") testdata time119 = timeparser (many (takeWhile1P Nothing (/='\ESC') <|> "" <$ ansi)) time1191 = timeparser (many ("" <$ try ansi <|> pure <$> anySingle)) timeparser p = timeIt $ print $ length $ concat $ map (concat . fromJust . parseMaybe p) testdata testdata = concat $ replicate 10000 [ "2008-01-01 income assets🏦checking $1 $1" , "2008-06-01 gift assets🏦checking $1 $2" , "2008-06-02 save assets🏦saving $1 $3" , " assets🏦checking ..m$-1\ESC[m\ESC[m $2" , "2008-06-03 eat & shop assets:cash ..m$-2\ESC[m\ESC[m 0" , "2008-12-31 pay off assets🏦checking ..m$-1\ESC[m\ESC[m ..m$-1\ESC[m\ESC[m" ] ghci> time118ish 4560000 CPU time: 0.17s ghci> time119 4560000 CPU time: 0.91s ghci> time1191 4560000 CPU time: 2.76s Possibly a more careful parser could beat regexReplace. Note the latter does memoisation, which could be faster and/or could also use more resident memory in some situations. Ideally we would calculate all widths before adding ANSI colour codes, so we wouldn't have to wastefully strip them.
This commit is contained in:
		
							parent
							
								
									2d068662c1
								
							
						
					
					
						commit
						f78dc639a5
					
				| @ -48,15 +48,14 @@ module Hledger.Utils.String ( | |||||||
|  ) where |  ) where | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| import Data.Char (isDigit, isSpace, toLower, toUpper) | import Data.Char (isSpace, toLower, toUpper) | ||||||
| import Data.List (intercalate, transpose) | import Data.List (intercalate, transpose) | ||||||
| import Text.Megaparsec (Parsec, (<|>), (<?>), anySingle, between, many, noneOf, | import Text.Megaparsec ((<|>), between, many, noneOf, sepBy) | ||||||
|                         oneOf, parseMaybe, sepBy, takeWhileP, try) | import Text.Megaparsec.Char (char) | ||||||
| import Text.Megaparsec.Char (char, string) |  | ||||||
| import Text.Printf (printf) | import Text.Printf (printf) | ||||||
| 
 | 
 | ||||||
| import Hledger.Utils.Parse | import Hledger.Utils.Parse | ||||||
| 
 | import Hledger.Utils.Regex (toRegex', regexReplace) | ||||||
| 
 | 
 | ||||||
| -- | Take elements from the end of a list. | -- | Take elements from the end of a list. | ||||||
| takeEnd n l = go (drop n l) l | takeEnd n l = go (drop n l) l | ||||||
| @ -342,14 +341,10 @@ strWidth = maximum . (0:) . map (foldr (\a b -> charWidth a + b) 0) . lines . st | |||||||
| -- >>> stripAnsi "\ESC[31m-1\ESC[m" | -- >>> stripAnsi "\ESC[31m-1\ESC[m" | ||||||
| -- "-1" | -- "-1" | ||||||
| stripAnsi :: String -> String | stripAnsi :: String -> String | ||||||
| stripAnsi s = case parseMaybe (many $ "" <$ try ansi <|> pure <$> anySingle) s of | stripAnsi s = either err id $ regexReplace ansire "" s | ||||||
|     Nothing -> error "Bad ansi escape"  -- PARTIAL: should not happen |  | ||||||
|     Just xs -> concat xs |  | ||||||
|  where |  where | ||||||
|     -- This parses lots of invalid ANSI escape codes, but that should be fine |    err    = error "stripAnsi: invalid replacement pattern"      -- PARTIAL, shouldn't happen | ||||||
|     ansi = string "\ESC[" *> digitSemicolons *> suffix <?> "ansi" :: Parsec CustomErr String Char |    ansire = toRegex' "\ESC\\[([0-9]+;)*([0-9]+)?[ABCDHJKfmsu]"  -- PARTIAL, should succeed | ||||||
|     digitSemicolons = takeWhileP Nothing (\c -> isDigit c || c == ';') |  | ||||||
|     suffix = oneOf ['A', 'B', 'C', 'D', 'H', 'J', 'K', 'f', 'm', 's', 'u'] |  | ||||||
| 
 | 
 | ||||||
| -- | Get the designated render width of a character: 0 for a combining | -- | Get the designated render width of a character: 0 for a combining | ||||||
| -- character, 1 for a regular character, 2 for a wide character. | -- character, 1 for a regular character, 2 for a wide character. | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user