lib: Replace some regex functions with parsers.

This commit is contained in:
Stephen Morgan 2020-08-31 22:44:41 +10:00
parent 20b39a5dd0
commit b91b391d08
2 changed files with 31 additions and 22 deletions

View File

@ -41,6 +41,7 @@ where
--- ** imports --- ** imports
import Prelude () import Prelude ()
import "base-compat-batteries" Prelude.Compat hiding (fail) import "base-compat-batteries" Prelude.Compat hiding (fail)
import Control.Applicative (liftA2)
import Control.Exception (IOException, handle, throw) import Control.Exception (IOException, handle, throw)
import Control.Monad (liftM, unless, when) import Control.Monad (liftM, unless, when)
import Control.Monad.Except (ExceptT, throwError) import Control.Monad.Except (ExceptT, throwError)
@ -48,13 +49,13 @@ import qualified Control.Monad.Fail as Fail
import Control.Monad.IO.Class (MonadIO, liftIO) import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.State.Strict (StateT, get, modify', evalStateT) import Control.Monad.State.Strict (StateT, get, modify', evalStateT)
import Control.Monad.Trans.Class (lift) import Control.Monad.Trans.Class (lift)
import Data.Char (toLower, isDigit, isSpace, isAlphaNum, ord) import Data.Char (toLower, isDigit, isSpace, isAlphaNum, isAscii, ord)
import Data.Bifunctor (first) import Data.Bifunctor (first)
import "base-compat-batteries" Data.List.Compat import "base-compat-batteries" Data.List.Compat
import qualified Data.List.Split as LS (splitOn) import qualified Data.List.Split as LS (splitOn)
import Data.Maybe import Data.Maybe (catMaybes, fromMaybe, isJust)
import Data.MemoUgly (memo) import Data.MemoUgly (memo)
import Data.Ord import Data.Ord (comparing)
import qualified Data.Set as S import qualified Data.Set as S
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
@ -62,17 +63,17 @@ import qualified Data.Text.Encoding as T
import qualified Data.Text.IO as T import qualified Data.Text.IO as T
import Data.Time.Calendar (Day) import Data.Time.Calendar (Day)
import Data.Time.Format (parseTimeM, defaultTimeLocale) import Data.Time.Format (parseTimeM, defaultTimeLocale)
import Safe import Safe (atMay, headMay, lastMay, readDef, readMay)
import System.Directory (doesFileExist) import System.Directory (doesFileExist)
import System.FilePath import System.FilePath ((</>), takeDirectory, takeExtension, takeFileName)
import qualified Data.Csv as Cassava import qualified Data.Csv as Cassava
import qualified Data.Csv.Parser.Megaparsec as CassavaMP import qualified Data.Csv.Parser.Megaparsec as CassavaMP
import qualified Data.ByteString as B import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy as BL
import Data.Foldable import Data.Foldable (asum, toList)
import Text.Megaparsec hiding (match, parse) import Text.Megaparsec hiding (match, parse)
import Text.Megaparsec.Char import Text.Megaparsec.Char (char, newline, string)
import Text.Megaparsec.Custom import Text.Megaparsec.Custom (customErrorBundlePretty, parseErrorAt)
import Text.Printf (printf) import Text.Printf (printf)
import Hledger.Data import Hledger.Data
@ -834,10 +835,9 @@ validateCsv rules numhdrlines (Right rs) = validate $ applyConditionalSkips $ dr
Nothing -> r:(applyConditionalSkips rest) Nothing -> r:(applyConditionalSkips rest)
Just cnt -> applyConditionalSkips (drop (cnt-1) rest) Just cnt -> applyConditionalSkips (drop (cnt-1) rest)
validate [] = Right [] validate [] = Right []
validate rs@(_first:_) validate rs@(_first:_) = case lessthan2 of
| isJust lessthan2 = let r = fromJust lessthan2 in Just r -> Left $ printf "CSV record %s has less than two fields" (show r)
Left $ printf "CSV record %s has less than two fields" (show r) Nothing -> Right rs
| otherwise = Right rs
where where
lessthan2 = headMay $ filter ((<2).length) rs lessthan2 = headMay $ filter ((<2).length) rs
@ -1199,7 +1199,13 @@ getEffectiveAssignment rules record f = lastMay $ map snd $ assignments
-- | Render a field assignment's template, possibly interpolating referenced -- | Render a field assignment's template, possibly interpolating referenced
-- CSV field values. Outer whitespace is removed from interpolated values. -- CSV field values. Outer whitespace is removed from interpolated values.
renderTemplate :: CsvRules -> CsvRecord -> FieldTemplate -> String renderTemplate :: CsvRules -> CsvRecord -> FieldTemplate -> String
renderTemplate rules record t = replaceAllBy (toRegex' "%[A-z0-9_-]+") (replaceCsvFieldReference rules record) t -- PARTIAL: should not happen renderTemplate rules record t = maybe t concat $ parseMaybe
(many $ takeWhile1P Nothing (/='%')
<|> replaceCsvFieldReference rules record <$> referencep)
t
where
referencep = liftA2 (:) (char '%') (takeWhile1P (Just "reference") isDescriptorChar) :: Parsec CustomErr String String
isDescriptorChar c = isAscii c && (isAlphaNum c || c == '_' || c == '-')
-- | Replace something that looks like a reference to a csv field ("%date" or "%1) -- | Replace something that looks like a reference to a csv field ("%date" or "%1)
-- with that field's value. If it doesn't look like a field reference, or if we -- with that field's value. If it doesn't look like a field reference, or if we

View File

@ -49,14 +49,14 @@ module Hledger.Utils.String (
) where ) where
import Data.Char import Data.Char (isDigit, isSpace, toLower, toUpper)
import Data.List import Data.List (intercalate, transpose)
import Text.Megaparsec import Text.Megaparsec (Parsec, (<|>), (<?>), between, many, noneOf, oneOf,
import Text.Megaparsec.Char parseMaybe, sepBy, takeWhile1P)
import Text.Megaparsec.Char (char, string)
import Text.Printf (printf) import Text.Printf (printf)
import Hledger.Utils.Parse import Hledger.Utils.Parse
import Hledger.Utils.Regex
-- | Take elements from the end of a list. -- | Take elements from the end of a list.
@ -341,12 +341,15 @@ takeWidth w (c:cs) | cw <= w = c:takeWidth (w-cw) cs
-- (not counted), and line breaks (in a multi-line string, the longest -- (not counted), and line breaks (in a multi-line string, the longest
-- line determines the width). -- line determines the width).
strWidth :: String -> Int strWidth :: String -> Int
strWidth "" = 0 strWidth = maximum . (0:) . map (foldr (\a b -> charWidth a + b) 0) . lines . stripAnsi
strWidth s = maximum $ map (foldr (\a b -> charWidth a + b) 0) $ lines s'
where s' = stripAnsi s
stripAnsi :: String -> String stripAnsi :: String -> String
stripAnsi = regexReplace (toRegex' "\ESC\\[([0-9]+;)*([0-9]+)?[ABCDHJKfmsu]") "" -- PARTIAL: should never happen, no backreferences stripAnsi s = maybe s concat $ parseMaybe (many $ takeWhile1P Nothing (/='\ESC') <|> "" <$ ansi) s
where
-- This parses lots of invalid ANSI escape codes, but that should be fine
ansi = string "\ESC[" *> digitSemicolons *> suffix <?> "ansi" :: Parsec CustomErr String Char
digitSemicolons = takeWhile1P Nothing (\c -> isDigit c || c == ';')
suffix = oneOf ['A', 'B', 'C', 'D', 'H', 'J', 'K', 'f', 'm', 's', 'u']
-- | Get the designated render width of a character: 0 for a combining -- | Get the designated render width of a character: 0 for a combining
-- character, 1 for a regular character, 2 for a wide character. -- character, 1 for a regular character, 2 for a wide character.