lib: Replace some regex functions with parsers.
This commit is contained in:
parent
20b39a5dd0
commit
b91b391d08
@ -41,6 +41,7 @@ where
|
|||||||
--- ** imports
|
--- ** imports
|
||||||
import Prelude ()
|
import Prelude ()
|
||||||
import "base-compat-batteries" Prelude.Compat hiding (fail)
|
import "base-compat-batteries" Prelude.Compat hiding (fail)
|
||||||
|
import Control.Applicative (liftA2)
|
||||||
import Control.Exception (IOException, handle, throw)
|
import Control.Exception (IOException, handle, throw)
|
||||||
import Control.Monad (liftM, unless, when)
|
import Control.Monad (liftM, unless, when)
|
||||||
import Control.Monad.Except (ExceptT, throwError)
|
import Control.Monad.Except (ExceptT, throwError)
|
||||||
@ -48,13 +49,13 @@ import qualified Control.Monad.Fail as Fail
|
|||||||
import Control.Monad.IO.Class (MonadIO, liftIO)
|
import Control.Monad.IO.Class (MonadIO, liftIO)
|
||||||
import Control.Monad.State.Strict (StateT, get, modify', evalStateT)
|
import Control.Monad.State.Strict (StateT, get, modify', evalStateT)
|
||||||
import Control.Monad.Trans.Class (lift)
|
import Control.Monad.Trans.Class (lift)
|
||||||
import Data.Char (toLower, isDigit, isSpace, isAlphaNum, ord)
|
import Data.Char (toLower, isDigit, isSpace, isAlphaNum, isAscii, ord)
|
||||||
import Data.Bifunctor (first)
|
import Data.Bifunctor (first)
|
||||||
import "base-compat-batteries" Data.List.Compat
|
import "base-compat-batteries" Data.List.Compat
|
||||||
import qualified Data.List.Split as LS (splitOn)
|
import qualified Data.List.Split as LS (splitOn)
|
||||||
import Data.Maybe
|
import Data.Maybe (catMaybes, fromMaybe, isJust)
|
||||||
import Data.MemoUgly (memo)
|
import Data.MemoUgly (memo)
|
||||||
import Data.Ord
|
import Data.Ord (comparing)
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
@ -62,17 +63,17 @@ import qualified Data.Text.Encoding as T
|
|||||||
import qualified Data.Text.IO as T
|
import qualified Data.Text.IO as T
|
||||||
import Data.Time.Calendar (Day)
|
import Data.Time.Calendar (Day)
|
||||||
import Data.Time.Format (parseTimeM, defaultTimeLocale)
|
import Data.Time.Format (parseTimeM, defaultTimeLocale)
|
||||||
import Safe
|
import Safe (atMay, headMay, lastMay, readDef, readMay)
|
||||||
import System.Directory (doesFileExist)
|
import System.Directory (doesFileExist)
|
||||||
import System.FilePath
|
import System.FilePath ((</>), takeDirectory, takeExtension, takeFileName)
|
||||||
import qualified Data.Csv as Cassava
|
import qualified Data.Csv as Cassava
|
||||||
import qualified Data.Csv.Parser.Megaparsec as CassavaMP
|
import qualified Data.Csv.Parser.Megaparsec as CassavaMP
|
||||||
import qualified Data.ByteString as B
|
import qualified Data.ByteString as B
|
||||||
import qualified Data.ByteString.Lazy as BL
|
import qualified Data.ByteString.Lazy as BL
|
||||||
import Data.Foldable
|
import Data.Foldable (asum, toList)
|
||||||
import Text.Megaparsec hiding (match, parse)
|
import Text.Megaparsec hiding (match, parse)
|
||||||
import Text.Megaparsec.Char
|
import Text.Megaparsec.Char (char, newline, string)
|
||||||
import Text.Megaparsec.Custom
|
import Text.Megaparsec.Custom (customErrorBundlePretty, parseErrorAt)
|
||||||
import Text.Printf (printf)
|
import Text.Printf (printf)
|
||||||
|
|
||||||
import Hledger.Data
|
import Hledger.Data
|
||||||
@ -834,10 +835,9 @@ validateCsv rules numhdrlines (Right rs) = validate $ applyConditionalSkips $ dr
|
|||||||
Nothing -> r:(applyConditionalSkips rest)
|
Nothing -> r:(applyConditionalSkips rest)
|
||||||
Just cnt -> applyConditionalSkips (drop (cnt-1) rest)
|
Just cnt -> applyConditionalSkips (drop (cnt-1) rest)
|
||||||
validate [] = Right []
|
validate [] = Right []
|
||||||
validate rs@(_first:_)
|
validate rs@(_first:_) = case lessthan2 of
|
||||||
| isJust lessthan2 = let r = fromJust lessthan2 in
|
Just r -> Left $ printf "CSV record %s has less than two fields" (show r)
|
||||||
Left $ printf "CSV record %s has less than two fields" (show r)
|
Nothing -> Right rs
|
||||||
| otherwise = Right rs
|
|
||||||
where
|
where
|
||||||
lessthan2 = headMay $ filter ((<2).length) rs
|
lessthan2 = headMay $ filter ((<2).length) rs
|
||||||
|
|
||||||
@ -1199,7 +1199,13 @@ getEffectiveAssignment rules record f = lastMay $ map snd $ assignments
|
|||||||
-- | Render a field assignment's template, possibly interpolating referenced
|
-- | Render a field assignment's template, possibly interpolating referenced
|
||||||
-- CSV field values. Outer whitespace is removed from interpolated values.
|
-- CSV field values. Outer whitespace is removed from interpolated values.
|
||||||
renderTemplate :: CsvRules -> CsvRecord -> FieldTemplate -> String
|
renderTemplate :: CsvRules -> CsvRecord -> FieldTemplate -> String
|
||||||
renderTemplate rules record t = replaceAllBy (toRegex' "%[A-z0-9_-]+") (replaceCsvFieldReference rules record) t -- PARTIAL: should not happen
|
renderTemplate rules record t = maybe t concat $ parseMaybe
|
||||||
|
(many $ takeWhile1P Nothing (/='%')
|
||||||
|
<|> replaceCsvFieldReference rules record <$> referencep)
|
||||||
|
t
|
||||||
|
where
|
||||||
|
referencep = liftA2 (:) (char '%') (takeWhile1P (Just "reference") isDescriptorChar) :: Parsec CustomErr String String
|
||||||
|
isDescriptorChar c = isAscii c && (isAlphaNum c || c == '_' || c == '-')
|
||||||
|
|
||||||
-- | Replace something that looks like a reference to a csv field ("%date" or "%1)
|
-- | Replace something that looks like a reference to a csv field ("%date" or "%1)
|
||||||
-- with that field's value. If it doesn't look like a field reference, or if we
|
-- with that field's value. If it doesn't look like a field reference, or if we
|
||||||
|
|||||||
@ -49,14 +49,14 @@ module Hledger.Utils.String (
|
|||||||
) where
|
) where
|
||||||
|
|
||||||
|
|
||||||
import Data.Char
|
import Data.Char (isDigit, isSpace, toLower, toUpper)
|
||||||
import Data.List
|
import Data.List (intercalate, transpose)
|
||||||
import Text.Megaparsec
|
import Text.Megaparsec (Parsec, (<|>), (<?>), between, many, noneOf, oneOf,
|
||||||
import Text.Megaparsec.Char
|
parseMaybe, sepBy, takeWhile1P)
|
||||||
|
import Text.Megaparsec.Char (char, string)
|
||||||
import Text.Printf (printf)
|
import Text.Printf (printf)
|
||||||
|
|
||||||
import Hledger.Utils.Parse
|
import Hledger.Utils.Parse
|
||||||
import Hledger.Utils.Regex
|
|
||||||
|
|
||||||
|
|
||||||
-- | Take elements from the end of a list.
|
-- | Take elements from the end of a list.
|
||||||
@ -341,12 +341,15 @@ takeWidth w (c:cs) | cw <= w = c:takeWidth (w-cw) cs
|
|||||||
-- (not counted), and line breaks (in a multi-line string, the longest
|
-- (not counted), and line breaks (in a multi-line string, the longest
|
||||||
-- line determines the width).
|
-- line determines the width).
|
||||||
strWidth :: String -> Int
|
strWidth :: String -> Int
|
||||||
strWidth "" = 0
|
strWidth = maximum . (0:) . map (foldr (\a b -> charWidth a + b) 0) . lines . stripAnsi
|
||||||
strWidth s = maximum $ map (foldr (\a b -> charWidth a + b) 0) $ lines s'
|
|
||||||
where s' = stripAnsi s
|
|
||||||
|
|
||||||
stripAnsi :: String -> String
|
stripAnsi :: String -> String
|
||||||
stripAnsi = regexReplace (toRegex' "\ESC\\[([0-9]+;)*([0-9]+)?[ABCDHJKfmsu]") "" -- PARTIAL: should never happen, no backreferences
|
stripAnsi s = maybe s concat $ parseMaybe (many $ takeWhile1P Nothing (/='\ESC') <|> "" <$ ansi) s
|
||||||
|
where
|
||||||
|
-- This parses lots of invalid ANSI escape codes, but that should be fine
|
||||||
|
ansi = string "\ESC[" *> digitSemicolons *> suffix <?> "ansi" :: Parsec CustomErr String Char
|
||||||
|
digitSemicolons = takeWhile1P Nothing (\c -> isDigit c || c == ';')
|
||||||
|
suffix = oneOf ['A', 'B', 'C', 'D', 'H', 'J', 'K', 'f', 'm', 's', 'u']
|
||||||
|
|
||||||
-- | Get the designated render width of a character: 0 for a combining
|
-- | Get the designated render width of a character: 0 for a combining
|
||||||
-- character, 1 for a regular character, 2 for a wide character.
|
-- character, 1 for a regular character, 2 for a wide character.
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user