fix:csv: respect encoding rule when rules file is input file [#2465]
This commit is contained in:
parent
1100c7e62e
commit
9351c70f74
@ -33,7 +33,8 @@ import System.IO (Handle)
|
||||
import Hledger.Data
|
||||
import Hledger.Utils
|
||||
import Hledger.Read.Common (aliasesFromOpts, Reader(..), InputOpts(..), journalFinalise)
|
||||
import Hledger.Read.RulesReader (readJournalFromCsv)
|
||||
import Hledger.Read.RulesReader (readJournalFromCsv, getRulesFile, rulesEncoding, readRules)
|
||||
import Control.Monad.Trans (lift)
|
||||
|
||||
--- ** doctest setup
|
||||
-- $setup
|
||||
@ -60,8 +61,10 @@ reader sep = Reader
|
||||
-- This does not check balance assertions.
|
||||
parse :: SepFormat -> InputOpts -> FilePath -> Handle -> ExceptT String IO Journal
|
||||
parse sep iopts f h = do
|
||||
let mrulesfile = mrules_file_ iopts
|
||||
readJournalFromCsv (Right <$> mrulesfile) f h (Just sep)
|
||||
rules <- readRules $ getRulesFile f (mrules_file_ iopts)
|
||||
mencoding <- rulesEncoding rules
|
||||
csvtext <- lift $ readHandlePortably' mencoding h
|
||||
readJournalFromCsv rules f csvtext (Just sep)
|
||||
-- apply any command line account aliases. Can fail with a bad replacement pattern.
|
||||
>>= liftEither . journalApplyAliases (aliasesFromOpts iopts)
|
||||
-- journalFinalise assumes the journal's items are
|
||||
|
||||
@ -29,13 +29,12 @@ module Hledger.Read.RulesReader (
|
||||
-- * Reader
|
||||
reader,
|
||||
-- * Misc.
|
||||
readJournalFromCsv,
|
||||
-- readRulesFile,
|
||||
-- parseCsvRules,
|
||||
-- validateCsvRules,
|
||||
-- CsvRules,
|
||||
dataFileFor,
|
||||
rulesFileFor,
|
||||
getRulesFile,
|
||||
readRules,
|
||||
rulesEncoding,
|
||||
readJournalFromCsv,
|
||||
parseBalanceAssertionType,
|
||||
-- * Tests
|
||||
tests_RulesReader,
|
||||
@ -59,7 +58,7 @@ import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.Lazy as BL
|
||||
import qualified Data.Csv as Cassava
|
||||
import qualified Data.Csv.Parser.Megaparsec as CassavaMegaparsec
|
||||
import Data.Encoding (encodingFromStringExplicit)
|
||||
import Data.Encoding (encodingFromStringExplicit, DynEncoding)
|
||||
import Data.Either (fromRight)
|
||||
import Data.Functor ((<&>))
|
||||
import Data.List (elemIndex, mapAccumL, nub, sortOn)
|
||||
@ -168,7 +167,7 @@ parse iopts rulesfile h = do
|
||||
-- gives: file pattern, data cleaning/generating command, archive flag
|
||||
|
||||
-- XXX higher-than usual logging priority for file reading (normally 6 or 7), to bypass excessive noise from elsewhere
|
||||
rules <- readRulesFile $ dbg1 "reading rules file" rulesfile
|
||||
rules <- readRules $ dbg1 "reading rules file" rulesfile
|
||||
let
|
||||
msourcearg = getDirective "source" rules
|
||||
-- Nothing -> error' $ rulesfile ++ " source rule must specify a file pattern or a command"
|
||||
@ -219,7 +218,7 @@ parse iopts rulesfile h = do
|
||||
(Nothing, _) -> return ()
|
||||
|
||||
-- 5. read raw, cleaned or generated data
|
||||
-- needs: file pattern, data file, data command
|
||||
-- needs: file pattern, data file, optional data file encoding, data command
|
||||
-- gives: clean data (possibly empty)
|
||||
|
||||
mexistingdatafile <- maybe (return Nothing) (\f -> liftIO $ do
|
||||
@ -233,9 +232,10 @@ parse iopts rulesfile h = do
|
||||
return ""
|
||||
|
||||
-- file found, and maybe a data cleaning command
|
||||
(_, Just f, mc) -> -- trace "file found" $
|
||||
(_, Just f, mc) -> do -- trace "file found" $
|
||||
mencoding <- rulesEncoding rules
|
||||
liftIO $ do
|
||||
raw <- openFileOrStdin f >>= readHandlePortably
|
||||
raw <- openFileOrStdin f >>= readHandlePortably' mencoding
|
||||
maybe (return raw) (\c -> runCommandAsFilter rulesfile (dbg0Msg ("running: "++c) c) raw) mc
|
||||
|
||||
-- no file pattern, but a data generating command
|
||||
@ -247,12 +247,11 @@ parse iopts rulesfile h = do
|
||||
error' $ rulesfile ++ " source rule must specify a file pattern or a command"
|
||||
|
||||
-- 6. convert the clean data to a (possibly empty) journal
|
||||
-- needs: clean data, rules, rules file, data file if any
|
||||
-- needs: clean data, rules, data file if any
|
||||
-- gives: journal
|
||||
|
||||
j <- do
|
||||
cleandatah <- liftIO $ inputToHandle cleandata
|
||||
readJournalFromCsv (Just $ Left rules) (fromMaybe "(cmd)" mdatafile) cleandatah Nothing
|
||||
readJournalFromCsv rules (fromMaybe "(cmd)" mdatafile) cleandata Nothing
|
||||
-- apply any command line account aliases. Can fail with a bad replacement pattern.
|
||||
>>= liftEither . journalApplyAliases (aliasesFromOpts iopts)
|
||||
-- journalFinalise assumes the journal's items are
|
||||
@ -389,15 +388,37 @@ dataFileFor = stripExtension "rules"
|
||||
rulesFileFor :: FilePath -> FilePath
|
||||
rulesFileFor = (++ ".rules")
|
||||
|
||||
-- | Return the given rules file path, or if none is given,
|
||||
-- the default rules file for the given csv file;
|
||||
-- or if the csv file is "-", raise an error.
|
||||
getRulesFile :: FilePath -> Maybe FilePath -> FilePath
|
||||
getRulesFile csvfile mrulesfile =
|
||||
case mrulesfile of
|
||||
Nothing | csvfile == "-" ->
|
||||
error' "please use --rules when reading CSV from stdin" -- PARTIAL
|
||||
-- XXX is this bad ? everything else here uses ExceptT
|
||||
Nothing -> rulesFileFor csvfile
|
||||
Just f -> f
|
||||
|
||||
-- | An exception-throwing IO action that reads and validates
|
||||
-- the specified CSV rules file (which may include other rules files).
|
||||
readRulesFile :: FilePath -> ExceptT String IO CsvRules
|
||||
readRulesFile f =
|
||||
readRules :: FilePath -> ExceptT String IO CsvRules
|
||||
readRules f =
|
||||
liftIO (do
|
||||
dbg6IO "using conversion rules file" f
|
||||
readFilePortably f >>= expandIncludes (takeDirectory f)
|
||||
) >>= either throwError return . parseAndValidateCsvRules f
|
||||
|
||||
-- | Read the encoding specified by the @encoding@ rule, if any.
|
||||
-- Or throw an error if an unrecognised encoding is specified.
|
||||
rulesEncoding :: CsvRules -> ExceptT String IO (Maybe DynEncoding)
|
||||
rulesEncoding rules = do
|
||||
case T.unpack <$> getDirective "encoding" rules of
|
||||
Nothing -> return Nothing
|
||||
Just encstr -> case encodingFromStringExplicit $ dbg4 "encoding name" encstr of
|
||||
Nothing -> throwError $ "Invalid encoding: " <> encstr
|
||||
Just enc -> return . Just $ dbg4 "encoding" enc
|
||||
|
||||
-- | Inline all files referenced by include directives in this hledger CSV rules text, recursively.
|
||||
-- Included file paths may be relative to the directory of the provided file path.
|
||||
-- Unlike with journal files, this is done as a pre-parse step to simplify the CSV rules parser.
|
||||
@ -1167,27 +1188,12 @@ _CSV_READING__________________________________________ = undefined
|
||||
--
|
||||
-- 4. Return the transactions as a Journal.
|
||||
--
|
||||
readJournalFromCsv :: Maybe (Either CsvRules FilePath) -> FilePath -> Handle -> Maybe SepFormat -> ExceptT String IO Journal
|
||||
readJournalFromCsv Nothing "-" h _ = lift (hClose h) *> throwError "please use --rules when reading CSV from stdin"
|
||||
readJournalFromCsv merulesfile csvfile csvhandle sep = do
|
||||
readJournalFromCsv :: CsvRules -> FilePath -> Text -> Maybe SepFormat -> ExceptT String IO Journal
|
||||
readJournalFromCsv rules csvfile csvtext sep = do
|
||||
-- for now, correctness is the priority here, efficiency not so much
|
||||
|
||||
rules <- case merulesfile of
|
||||
Just (Left rs) -> return rs
|
||||
Just (Right rulesfile) -> readRulesFile rulesfile
|
||||
Nothing -> readRulesFile $ rulesFileFor csvfile
|
||||
dbg6IO "csv rules" rules
|
||||
|
||||
-- read csv while being aware of the encoding
|
||||
mencoding <- do
|
||||
-- XXX higher-than usual debug level for file reading to bypass excessive noise from elsewhere, normally 6 or 7
|
||||
case T.unpack <$> getDirective "encoding" rules of
|
||||
Just rawenc -> case encodingFromStringExplicit $ dbg4 "raw-encoding" rawenc of
|
||||
Just enc -> return . Just $ dbg4 "encoding" enc
|
||||
Nothing -> throwError $ "Invalid encoding: " <> rawenc
|
||||
Nothing -> return Nothing
|
||||
csvtext <- lift $ readHandlePortably' mencoding csvhandle
|
||||
|
||||
-- convert the csv data to lines and remove all empty/blank lines
|
||||
let csvlines1 = dbg9 "csvlines1" $ filter (not . T.null . T.strip) $ dbg9 "csvlines0" $ T.lines csvtext
|
||||
|
||||
|
||||
18
hledger/test/csv-encoding.test
Normal file
18
hledger/test/csv-encoding.test
Normal file
@ -0,0 +1,18 @@
|
||||
# * CSV encoding tests
|
||||
|
||||
# ** 1. The encoding rule works when reading the csv file as input.
|
||||
$ hledger -f t.iso8859-1.csv print
|
||||
2025-01-01 éclair
|
||||
expenses:unknown 1
|
||||
income:unknown -1
|
||||
|
||||
>=
|
||||
|
||||
# ** 2. The encoding rule works when reading rules file as input. [#2465]
|
||||
$ hledger -f t.iso8859-1.csv.rules print
|
||||
2025-01-01 éclair
|
||||
expenses:unknown 1
|
||||
income:unknown -1
|
||||
|
||||
>=
|
||||
|
||||
1
hledger/test/t.iso8859-1.csv
Normal file
1
hledger/test/t.iso8859-1.csv
Normal file
@ -0,0 +1 @@
|
||||
2025-01-01, éclair, 1
|
||||
|
3
hledger/test/t.iso8859-1.csv.rules
Normal file
3
hledger/test/t.iso8859-1.csv.rules
Normal file
@ -0,0 +1,3 @@
|
||||
source ./t.iso8859-1.csv
|
||||
encoding iso8859-1
|
||||
fields date, description, amount
|
||||
Loading…
Reference in New Issue
Block a user