fix:csv: respect encoding rule when rules file is input file [#2465]
This commit is contained in:
parent
1100c7e62e
commit
9351c70f74
@ -33,7 +33,8 @@ import System.IO (Handle)
|
|||||||
import Hledger.Data
|
import Hledger.Data
|
||||||
import Hledger.Utils
|
import Hledger.Utils
|
||||||
import Hledger.Read.Common (aliasesFromOpts, Reader(..), InputOpts(..), journalFinalise)
|
import Hledger.Read.Common (aliasesFromOpts, Reader(..), InputOpts(..), journalFinalise)
|
||||||
import Hledger.Read.RulesReader (readJournalFromCsv)
|
import Hledger.Read.RulesReader (readJournalFromCsv, getRulesFile, rulesEncoding, readRules)
|
||||||
|
import Control.Monad.Trans (lift)
|
||||||
|
|
||||||
--- ** doctest setup
|
--- ** doctest setup
|
||||||
-- $setup
|
-- $setup
|
||||||
@ -60,8 +61,10 @@ reader sep = Reader
|
|||||||
-- This does not check balance assertions.
|
-- This does not check balance assertions.
|
||||||
parse :: SepFormat -> InputOpts -> FilePath -> Handle -> ExceptT String IO Journal
|
parse :: SepFormat -> InputOpts -> FilePath -> Handle -> ExceptT String IO Journal
|
||||||
parse sep iopts f h = do
|
parse sep iopts f h = do
|
||||||
let mrulesfile = mrules_file_ iopts
|
rules <- readRules $ getRulesFile f (mrules_file_ iopts)
|
||||||
readJournalFromCsv (Right <$> mrulesfile) f h (Just sep)
|
mencoding <- rulesEncoding rules
|
||||||
|
csvtext <- lift $ readHandlePortably' mencoding h
|
||||||
|
readJournalFromCsv rules f csvtext (Just sep)
|
||||||
-- apply any command line account aliases. Can fail with a bad replacement pattern.
|
-- apply any command line account aliases. Can fail with a bad replacement pattern.
|
||||||
>>= liftEither . journalApplyAliases (aliasesFromOpts iopts)
|
>>= liftEither . journalApplyAliases (aliasesFromOpts iopts)
|
||||||
-- journalFinalise assumes the journal's items are
|
-- journalFinalise assumes the journal's items are
|
||||||
|
|||||||
@ -29,13 +29,12 @@ module Hledger.Read.RulesReader (
|
|||||||
-- * Reader
|
-- * Reader
|
||||||
reader,
|
reader,
|
||||||
-- * Misc.
|
-- * Misc.
|
||||||
readJournalFromCsv,
|
|
||||||
-- readRulesFile,
|
|
||||||
-- parseCsvRules,
|
|
||||||
-- validateCsvRules,
|
|
||||||
-- CsvRules,
|
|
||||||
dataFileFor,
|
dataFileFor,
|
||||||
rulesFileFor,
|
rulesFileFor,
|
||||||
|
getRulesFile,
|
||||||
|
readRules,
|
||||||
|
rulesEncoding,
|
||||||
|
readJournalFromCsv,
|
||||||
parseBalanceAssertionType,
|
parseBalanceAssertionType,
|
||||||
-- * Tests
|
-- * Tests
|
||||||
tests_RulesReader,
|
tests_RulesReader,
|
||||||
@ -59,7 +58,7 @@ import qualified Data.ByteString as B
|
|||||||
import qualified Data.ByteString.Lazy as BL
|
import qualified Data.ByteString.Lazy as BL
|
||||||
import qualified Data.Csv as Cassava
|
import qualified Data.Csv as Cassava
|
||||||
import qualified Data.Csv.Parser.Megaparsec as CassavaMegaparsec
|
import qualified Data.Csv.Parser.Megaparsec as CassavaMegaparsec
|
||||||
import Data.Encoding (encodingFromStringExplicit)
|
import Data.Encoding (encodingFromStringExplicit, DynEncoding)
|
||||||
import Data.Either (fromRight)
|
import Data.Either (fromRight)
|
||||||
import Data.Functor ((<&>))
|
import Data.Functor ((<&>))
|
||||||
import Data.List (elemIndex, mapAccumL, nub, sortOn)
|
import Data.List (elemIndex, mapAccumL, nub, sortOn)
|
||||||
@ -168,7 +167,7 @@ parse iopts rulesfile h = do
|
|||||||
-- gives: file pattern, data cleaning/generating command, archive flag
|
-- gives: file pattern, data cleaning/generating command, archive flag
|
||||||
|
|
||||||
-- XXX higher-than usual logging priority for file reading (normally 6 or 7), to bypass excessive noise from elsewhere
|
-- XXX higher-than usual logging priority for file reading (normally 6 or 7), to bypass excessive noise from elsewhere
|
||||||
rules <- readRulesFile $ dbg1 "reading rules file" rulesfile
|
rules <- readRules $ dbg1 "reading rules file" rulesfile
|
||||||
let
|
let
|
||||||
msourcearg = getDirective "source" rules
|
msourcearg = getDirective "source" rules
|
||||||
-- Nothing -> error' $ rulesfile ++ " source rule must specify a file pattern or a command"
|
-- Nothing -> error' $ rulesfile ++ " source rule must specify a file pattern or a command"
|
||||||
@ -219,7 +218,7 @@ parse iopts rulesfile h = do
|
|||||||
(Nothing, _) -> return ()
|
(Nothing, _) -> return ()
|
||||||
|
|
||||||
-- 5. read raw, cleaned or generated data
|
-- 5. read raw, cleaned or generated data
|
||||||
-- needs: file pattern, data file, data command
|
-- needs: file pattern, data file, optional data file encoding, data command
|
||||||
-- gives: clean data (possibly empty)
|
-- gives: clean data (possibly empty)
|
||||||
|
|
||||||
mexistingdatafile <- maybe (return Nothing) (\f -> liftIO $ do
|
mexistingdatafile <- maybe (return Nothing) (\f -> liftIO $ do
|
||||||
@ -233,9 +232,10 @@ parse iopts rulesfile h = do
|
|||||||
return ""
|
return ""
|
||||||
|
|
||||||
-- file found, and maybe a data cleaning command
|
-- file found, and maybe a data cleaning command
|
||||||
(_, Just f, mc) -> -- trace "file found" $
|
(_, Just f, mc) -> do -- trace "file found" $
|
||||||
|
mencoding <- rulesEncoding rules
|
||||||
liftIO $ do
|
liftIO $ do
|
||||||
raw <- openFileOrStdin f >>= readHandlePortably
|
raw <- openFileOrStdin f >>= readHandlePortably' mencoding
|
||||||
maybe (return raw) (\c -> runCommandAsFilter rulesfile (dbg0Msg ("running: "++c) c) raw) mc
|
maybe (return raw) (\c -> runCommandAsFilter rulesfile (dbg0Msg ("running: "++c) c) raw) mc
|
||||||
|
|
||||||
-- no file pattern, but a data generating command
|
-- no file pattern, but a data generating command
|
||||||
@ -247,12 +247,11 @@ parse iopts rulesfile h = do
|
|||||||
error' $ rulesfile ++ " source rule must specify a file pattern or a command"
|
error' $ rulesfile ++ " source rule must specify a file pattern or a command"
|
||||||
|
|
||||||
-- 6. convert the clean data to a (possibly empty) journal
|
-- 6. convert the clean data to a (possibly empty) journal
|
||||||
-- needs: clean data, rules, rules file, data file if any
|
-- needs: clean data, rules, data file if any
|
||||||
-- gives: journal
|
-- gives: journal
|
||||||
|
|
||||||
j <- do
|
j <- do
|
||||||
cleandatah <- liftIO $ inputToHandle cleandata
|
readJournalFromCsv rules (fromMaybe "(cmd)" mdatafile) cleandata Nothing
|
||||||
readJournalFromCsv (Just $ Left rules) (fromMaybe "(cmd)" mdatafile) cleandatah Nothing
|
|
||||||
-- apply any command line account aliases. Can fail with a bad replacement pattern.
|
-- apply any command line account aliases. Can fail with a bad replacement pattern.
|
||||||
>>= liftEither . journalApplyAliases (aliasesFromOpts iopts)
|
>>= liftEither . journalApplyAliases (aliasesFromOpts iopts)
|
||||||
-- journalFinalise assumes the journal's items are
|
-- journalFinalise assumes the journal's items are
|
||||||
@ -389,15 +388,37 @@ dataFileFor = stripExtension "rules"
|
|||||||
rulesFileFor :: FilePath -> FilePath
|
rulesFileFor :: FilePath -> FilePath
|
||||||
rulesFileFor = (++ ".rules")
|
rulesFileFor = (++ ".rules")
|
||||||
|
|
||||||
|
-- | Return the given rules file path, or if none is given,
|
||||||
|
-- the default rules file for the given csv file;
|
||||||
|
-- or if the csv file is "-", raise an error.
|
||||||
|
getRulesFile :: FilePath -> Maybe FilePath -> FilePath
|
||||||
|
getRulesFile csvfile mrulesfile =
|
||||||
|
case mrulesfile of
|
||||||
|
Nothing | csvfile == "-" ->
|
||||||
|
error' "please use --rules when reading CSV from stdin" -- PARTIAL
|
||||||
|
-- XXX is this bad ? everything else here uses ExceptT
|
||||||
|
Nothing -> rulesFileFor csvfile
|
||||||
|
Just f -> f
|
||||||
|
|
||||||
-- | An exception-throwing IO action that reads and validates
|
-- | An exception-throwing IO action that reads and validates
|
||||||
-- the specified CSV rules file (which may include other rules files).
|
-- the specified CSV rules file (which may include other rules files).
|
||||||
readRulesFile :: FilePath -> ExceptT String IO CsvRules
|
readRules :: FilePath -> ExceptT String IO CsvRules
|
||||||
readRulesFile f =
|
readRules f =
|
||||||
liftIO (do
|
liftIO (do
|
||||||
dbg6IO "using conversion rules file" f
|
dbg6IO "using conversion rules file" f
|
||||||
readFilePortably f >>= expandIncludes (takeDirectory f)
|
readFilePortably f >>= expandIncludes (takeDirectory f)
|
||||||
) >>= either throwError return . parseAndValidateCsvRules f
|
) >>= either throwError return . parseAndValidateCsvRules f
|
||||||
|
|
||||||
|
-- | Read the encoding specified by the @encoding@ rule, if any.
|
||||||
|
-- Or throw an error if an unrecognised encoding is specified.
|
||||||
|
rulesEncoding :: CsvRules -> ExceptT String IO (Maybe DynEncoding)
|
||||||
|
rulesEncoding rules = do
|
||||||
|
case T.unpack <$> getDirective "encoding" rules of
|
||||||
|
Nothing -> return Nothing
|
||||||
|
Just encstr -> case encodingFromStringExplicit $ dbg4 "encoding name" encstr of
|
||||||
|
Nothing -> throwError $ "Invalid encoding: " <> encstr
|
||||||
|
Just enc -> return . Just $ dbg4 "encoding" enc
|
||||||
|
|
||||||
-- | Inline all files referenced by include directives in this hledger CSV rules text, recursively.
|
-- | Inline all files referenced by include directives in this hledger CSV rules text, recursively.
|
||||||
-- Included file paths may be relative to the directory of the provided file path.
|
-- Included file paths may be relative to the directory of the provided file path.
|
||||||
-- Unlike with journal files, this is done as a pre-parse step to simplify the CSV rules parser.
|
-- Unlike with journal files, this is done as a pre-parse step to simplify the CSV rules parser.
|
||||||
@ -1167,27 +1188,12 @@ _CSV_READING__________________________________________ = undefined
|
|||||||
--
|
--
|
||||||
-- 4. Return the transactions as a Journal.
|
-- 4. Return the transactions as a Journal.
|
||||||
--
|
--
|
||||||
readJournalFromCsv :: Maybe (Either CsvRules FilePath) -> FilePath -> Handle -> Maybe SepFormat -> ExceptT String IO Journal
|
readJournalFromCsv :: CsvRules -> FilePath -> Text -> Maybe SepFormat -> ExceptT String IO Journal
|
||||||
readJournalFromCsv Nothing "-" h _ = lift (hClose h) *> throwError "please use --rules when reading CSV from stdin"
|
readJournalFromCsv rules csvfile csvtext sep = do
|
||||||
readJournalFromCsv merulesfile csvfile csvhandle sep = do
|
|
||||||
-- for now, correctness is the priority here, efficiency not so much
|
-- for now, correctness is the priority here, efficiency not so much
|
||||||
|
|
||||||
rules <- case merulesfile of
|
|
||||||
Just (Left rs) -> return rs
|
|
||||||
Just (Right rulesfile) -> readRulesFile rulesfile
|
|
||||||
Nothing -> readRulesFile $ rulesFileFor csvfile
|
|
||||||
dbg6IO "csv rules" rules
|
dbg6IO "csv rules" rules
|
||||||
|
|
||||||
-- read csv while being aware of the encoding
|
|
||||||
mencoding <- do
|
|
||||||
-- XXX higher-than usual debug level for file reading to bypass excessive noise from elsewhere, normally 6 or 7
|
|
||||||
case T.unpack <$> getDirective "encoding" rules of
|
|
||||||
Just rawenc -> case encodingFromStringExplicit $ dbg4 "raw-encoding" rawenc of
|
|
||||||
Just enc -> return . Just $ dbg4 "encoding" enc
|
|
||||||
Nothing -> throwError $ "Invalid encoding: " <> rawenc
|
|
||||||
Nothing -> return Nothing
|
|
||||||
csvtext <- lift $ readHandlePortably' mencoding csvhandle
|
|
||||||
|
|
||||||
-- convert the csv data to lines and remove all empty/blank lines
|
-- convert the csv data to lines and remove all empty/blank lines
|
||||||
let csvlines1 = dbg9 "csvlines1" $ filter (not . T.null . T.strip) $ dbg9 "csvlines0" $ T.lines csvtext
|
let csvlines1 = dbg9 "csvlines1" $ filter (not . T.null . T.strip) $ dbg9 "csvlines0" $ T.lines csvtext
|
||||||
|
|
||||||
|
|||||||
18
hledger/test/csv-encoding.test
Normal file
18
hledger/test/csv-encoding.test
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
# * CSV encoding tests
|
||||||
|
|
||||||
|
# ** 1. The encoding rule works when reading the csv file as input.
|
||||||
|
$ hledger -f t.iso8859-1.csv print
|
||||||
|
2025-01-01 éclair
|
||||||
|
expenses:unknown 1
|
||||||
|
income:unknown -1
|
||||||
|
|
||||||
|
>=
|
||||||
|
|
||||||
|
# ** 2. The encoding rule works when reading rules file as input. [#2465]
|
||||||
|
$ hledger -f t.iso8859-1.csv.rules print
|
||||||
|
2025-01-01 éclair
|
||||||
|
expenses:unknown 1
|
||||||
|
income:unknown -1
|
||||||
|
|
||||||
|
>=
|
||||||
|
|
||||||
1
hledger/test/t.iso8859-1.csv
Normal file
1
hledger/test/t.iso8859-1.csv
Normal file
@ -0,0 +1 @@
|
|||||||
|
2025-01-01, éclair, 1
|
||||||
|
3
hledger/test/t.iso8859-1.csv.rules
Normal file
3
hledger/test/t.iso8859-1.csv.rules
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
source ./t.iso8859-1.csv
|
||||||
|
encoding iso8859-1
|
||||||
|
fields date, description, amount
|
||||||
Loading…
Reference in New Issue
Block a user