lib: Replace some regex functions with parsers.

This commit is contained in:
Stephen Morgan 2020-08-31 22:44:41 +10:00
parent 20b39a5dd0
commit b91b391d08
2 changed files with 31 additions and 22 deletions

View File

@ -41,6 +41,7 @@ where
--- ** imports
import Prelude ()
import "base-compat-batteries" Prelude.Compat hiding (fail)
import Control.Applicative (liftA2)
import Control.Exception (IOException, handle, throw)
import Control.Monad (liftM, unless, when)
import Control.Monad.Except (ExceptT, throwError)
@ -48,13 +49,13 @@ import qualified Control.Monad.Fail as Fail
import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.State.Strict (StateT, get, modify', evalStateT)
import Control.Monad.Trans.Class (lift)
import Data.Char (toLower, isDigit, isSpace, isAlphaNum, ord)
import Data.Char (toLower, isDigit, isSpace, isAlphaNum, isAscii, ord)
import Data.Bifunctor (first)
import "base-compat-batteries" Data.List.Compat
import qualified Data.List.Split as LS (splitOn)
import Data.Maybe
import Data.Maybe (catMaybes, fromMaybe, isJust)
import Data.MemoUgly (memo)
import Data.Ord
import Data.Ord (comparing)
import qualified Data.Set as S
import Data.Text (Text)
import qualified Data.Text as T
@ -62,17 +63,17 @@ import qualified Data.Text.Encoding as T
import qualified Data.Text.IO as T
import Data.Time.Calendar (Day)
import Data.Time.Format (parseTimeM, defaultTimeLocale)
import Safe
import Safe (atMay, headMay, lastMay, readDef, readMay)
import System.Directory (doesFileExist)
import System.FilePath
import System.FilePath ((</>), takeDirectory, takeExtension, takeFileName)
import qualified Data.Csv as Cassava
import qualified Data.Csv.Parser.Megaparsec as CassavaMP
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import Data.Foldable
import Data.Foldable (asum, toList)
import Text.Megaparsec hiding (match, parse)
import Text.Megaparsec.Char
import Text.Megaparsec.Custom
import Text.Megaparsec.Char (char, newline, string)
import Text.Megaparsec.Custom (customErrorBundlePretty, parseErrorAt)
import Text.Printf (printf)
import Hledger.Data
@ -834,10 +835,9 @@ validateCsv rules numhdrlines (Right rs) = validate $ applyConditionalSkips $ dr
Nothing -> r:(applyConditionalSkips rest)
Just cnt -> applyConditionalSkips (drop (cnt-1) rest)
validate [] = Right []
validate rs@(_first:_)
| isJust lessthan2 = let r = fromJust lessthan2 in
Left $ printf "CSV record %s has less than two fields" (show r)
| otherwise = Right rs
validate rs@(_first:_) = case lessthan2 of
Just r -> Left $ printf "CSV record %s has less than two fields" (show r)
Nothing -> Right rs
where
lessthan2 = headMay $ filter ((<2).length) rs
@ -1199,7 +1199,13 @@ getEffectiveAssignment rules record f = lastMay $ map snd $ assignments
-- | Render a field assignment's template, possibly interpolating referenced
-- CSV field values. Outer whitespace is removed from interpolated values.
renderTemplate :: CsvRules -> CsvRecord -> FieldTemplate -> String
renderTemplate rules record t = replaceAllBy (toRegex' "%[A-z0-9_-]+") (replaceCsvFieldReference rules record) t -- PARTIAL: should not happen
renderTemplate rules record t = maybe t concat $ parseMaybe
(many $ takeWhile1P Nothing (/='%')
<|> replaceCsvFieldReference rules record <$> referencep)
t
where
referencep = liftA2 (:) (char '%') (takeWhile1P (Just "reference") isDescriptorChar) :: Parsec CustomErr String String
isDescriptorChar c = isAscii c && (isAlphaNum c || c == '_' || c == '-')
-- | Replace something that looks like a reference to a csv field ("%date" or "%1)
-- with that field's value. If it doesn't look like a field reference, or if we

View File

@ -49,14 +49,14 @@ module Hledger.Utils.String (
) where
import Data.Char
import Data.List
import Text.Megaparsec
import Text.Megaparsec.Char
import Data.Char (isDigit, isSpace, toLower, toUpper)
import Data.List (intercalate, transpose)
import Text.Megaparsec (Parsec, (<|>), (<?>), between, many, noneOf, oneOf,
parseMaybe, sepBy, takeWhile1P)
import Text.Megaparsec.Char (char, string)
import Text.Printf (printf)
import Hledger.Utils.Parse
import Hledger.Utils.Regex
-- | Take elements from the end of a list.
@ -341,12 +341,15 @@ takeWidth w (c:cs) | cw <= w = c:takeWidth (w-cw) cs
-- (not counted), and line breaks (in a multi-line string, the longest
-- line determines the width).
strWidth :: String -> Int
strWidth "" = 0
strWidth s = maximum $ map (foldr (\a b -> charWidth a + b) 0) $ lines s'
where s' = stripAnsi s
strWidth = maximum . (0:) . map (foldr (\a b -> charWidth a + b) 0) . lines . stripAnsi
stripAnsi :: String -> String
stripAnsi = regexReplace (toRegex' "\ESC\\[([0-9]+;)*([0-9]+)?[ABCDHJKfmsu]") "" -- PARTIAL: should never happen, no backreferences
stripAnsi s = maybe s concat $ parseMaybe (many $ takeWhile1P Nothing (/='\ESC') <|> "" <$ ansi) s
where
-- This parses lots of invalid ANSI escape codes, but that should be fine
ansi = string "\ESC[" *> digitSemicolons *> suffix <?> "ansi" :: Parsec CustomErr String Char
digitSemicolons = takeWhile1P Nothing (\c -> isDigit c || c == ';')
suffix = oneOf ['A', 'B', 'C', 'D', 'H', 'J', 'K', 'f', 'm', 's', 'u']
-- | Get the designated render width of a character: 0 for a combining
-- character, 1 for a regular character, 2 for a wide character.