diff --git a/hledger-lib/Hledger/Read.hs b/hledger-lib/Hledger/Read.hs index 95239466c..d89296791 100644 --- a/hledger-lib/Hledger/Read.hs +++ b/hledger-lib/Hledger/Read.hs @@ -102,6 +102,7 @@ module Hledger.Read ( -- * Easy journal parsing readJournal', + readJournal'', readJournalFile', readJournalFiles', orDieTrying, @@ -125,7 +126,7 @@ module Hledger.Read ( --- ** imports import qualified Control.Exception as C -import Control.Monad (unless, when, forM) +import Control.Monad (unless, when, forM, (<=<)) import "mtl" Control.Monad.Except (ExceptT(..), runExceptT, liftEither) import Control.Monad.IO.Class (MonadIO, liftIO) import Data.Default (def) @@ -145,7 +146,7 @@ import System.Environment (getEnv) import System.Exit (exitFailure) import System.FilePath ((<.>), (), splitDirectories, splitFileName, takeFileName) import System.Info (os) -import System.IO (hPutStr, stderr) +import System.IO (Handle, hPutStr, stderr) import Hledger.Data.Dates (getCurrentDay, parsedateM, showDate) import Hledger.Data.Types @@ -205,7 +206,7 @@ type PrefixedFilePath = FilePath -- | @readJournal iopts mfile txt@ -- --- Read a Journal from some text, with strict checks if enabled, +-- Read a Journal from some handle, with strict checks if enabled, -- or return an error message. -- -- The reader (data format) is chosen based on, in this order: @@ -219,11 +220,11 @@ type PrefixedFilePath = FilePath -- If none of these is available, or if the reader name is unrecognised, -- we use the journal reader (for predictability). -- -readJournal :: InputOpts -> Maybe FilePath -> Text -> ExceptT String IO Journal -readJournal iopts@InputOpts{strict_, _defer} mpath txt = do +readJournal :: InputOpts -> Maybe FilePath -> Handle -> ExceptT String IO Journal +readJournal iopts@InputOpts{strict_, _defer} mpath hdl = do let r :: Reader IO = fromMaybe JournalReader.reader $ findReader (mformat_ iopts) mpath dbg6IO "readJournal: trying reader" (rFormat r) - j <- rReadFn r iopts (fromMaybe "(string)" mpath) txt + j <- rReadFn r iopts (fromMaybe "(string)" mpath) hdl when (strict_ && not _defer) $ liftEither $ journalStrictChecks j return j @@ -264,11 +265,11 @@ readJournalFileAndLatestDates iopts prefixedfile = do (mfmt, f) = splitReaderPrefix prefixedfile iopts' = iopts{mformat_=asum [mfmt, mformat_ iopts]} liftIO $ requireJournalFileExists f - t <- + h <- traceOrLogAt 6 ("readJournalFile: "++takeFileName f) $ - liftIO $ readFileOrStdinPortably f + liftIO $ openFileOrStdin f -- <- T.readFile f -- or without line ending translation, for testing - j <- readJournal iopts' (Just f) t + j <- readJournal iopts' (Just f) h if new_ iopts then do ds <- liftIO $ previousLatestDates f @@ -313,9 +314,14 @@ readJournalFilesAndLatestDates iopts pfs = do -- | An easy version of 'readJournal' which assumes default options, and fails -- in the IO monad. -readJournal' :: Text -> IO Journal +readJournal' :: Handle -> IO Journal readJournal' = orDieTrying . readJournal definputopts Nothing +-- | An even easier version of 'readJournal' which additionally to 'readJournal'' +-- also takes a 'Text' instead of a 'Handle'. +readJournal'' :: Text -> IO Journal +readJournal'' = readJournal' <=< inputToHandle + -- | An easy version of 'readJournalFile' which assumes default options, and fails -- in the IO monad. readJournalFile' :: PrefixedFilePath -> IO Journal diff --git a/hledger-lib/Hledger/Read/Common.hs b/hledger-lib/Hledger/Read/Common.hs index 493a4fae6..d54776efd 100644 --- a/hledger-lib/Hledger/Read/Common.hs +++ b/hledger-lib/Hledger/Read/Common.hs @@ -34,6 +34,7 @@ module Hledger.Read.Common ( HasInputOpts(..), definputopts, rawOptsToInputOpts, + handleReadFnToTextReadFn, -- * parsing utilities parseAndFinaliseJournal, @@ -148,6 +149,7 @@ import Data.Time.Clock.POSIX (getPOSIXTime) import Data.Time.LocalTime (LocalTime(..), TimeOfDay(..)) import Data.Word (Word8) import System.FilePath (takeFileName) +import System.IO (Handle) import Text.Megaparsec import Text.Megaparsec.Char (char, char', digitChar, newline, string) import Text.Megaparsec.Char.Lexer (decimal) @@ -179,9 +181,9 @@ data Reader m = Reader { ,rExtensions :: [String] -- The entry point for reading this format, accepting input options, file - -- path for error messages and file contents, producing an exception-raising IO + -- path for error messages and file contents via the handle, producing an exception-raising IO -- action that produces a journal or error message. - ,rReadFn :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal + ,rReadFn :: InputOpts -> FilePath -> Handle -> ExceptT String IO Journal -- The actual megaparsec parser called by the above, in case -- another parser (includedirectivep) wants to use it directly. @@ -231,6 +233,10 @@ rawOptsToInputOpts day usecoloronstdout postingaccttags rawopts = ,_ioDay = day } +handleReadFnToTextReadFn :: (InputOpts -> FilePath -> Text -> ExceptT String IO Journal) -> InputOpts -> FilePath -> Handle -> ExceptT String IO Journal +handleReadFnToTextReadFn p iopts fp = + p iopts fp <=< lift . readHandlePortably + -- | Get the date span from --forecast's PERIODEXPR argument, if any. -- This will fail with a usage error if the period expression cannot be parsed, -- or if it contains a report interval. diff --git a/hledger-lib/Hledger/Read/CsvReader.hs b/hledger-lib/Hledger/Read/CsvReader.hs index da2e3969d..c957865ab 100644 --- a/hledger-lib/Hledger/Read/CsvReader.hs +++ b/hledger-lib/Hledger/Read/CsvReader.hs @@ -28,7 +28,7 @@ where import Prelude hiding (Applicative(..)) import Control.Monad.Except (ExceptT(..), liftEither) import Control.Monad.IO.Class (MonadIO) -import Data.Text (Text) +import System.IO (Handle) import Hledger.Data import Hledger.Utils @@ -54,10 +54,10 @@ reader sep = Reader -- This file path is normally the CSV(/SSV/TSV) data file, and a corresponding rules file is inferred. -- But it can also be the rules file, in which case the corresponding data file is inferred. -- This does not check balance assertions. -parse :: SepFormat -> InputOpts -> FilePath -> Text -> ExceptT String IO Journal -parse sep iopts f t = do +parse :: SepFormat -> InputOpts -> FilePath -> Handle -> ExceptT String IO Journal +parse sep iopts f h = do let mrulesfile = mrules_file_ iopts - readJournalFromCsv (Right <$> mrulesfile) f t (Just sep) + readJournalFromCsv (Right <$> mrulesfile) f h (Just sep) -- apply any command line account aliases. Can fail with a bad replacement pattern. >>= liftEither . journalApplyAliases (aliasesFromOpts iopts) -- journalFinalise assumes the journal's items are @@ -65,7 +65,7 @@ parse sep iopts f t = do -- But here they are already properly ordered. So we'd -- better preemptively reverse them once more. XXX inefficient . journalReverse - >>= journalFinalise iopts{balancingopts_=(balancingopts_ iopts){ignore_assertions_=True}} f t + >>= journalFinalise iopts{balancingopts_=(balancingopts_ iopts){ignore_assertions_=True}} f "" --- ** tests diff --git a/hledger-lib/Hledger/Read/JournalReader.hs b/hledger-lib/Hledger/Read/JournalReader.hs index e69cf274a..c848136eb 100644 --- a/hledger-lib/Hledger/Read/JournalReader.hs +++ b/hledger-lib/Hledger/Read/JournalReader.hs @@ -194,7 +194,7 @@ reader :: MonadIO m => Reader m reader = Reader {rFormat = Journal' ,rExtensions = ["journal", "j", "hledger", "ledger"] - ,rReadFn = parse + ,rReadFn = handleReadFnToTextReadFn parse ,rParser = journalp -- no need to add command line aliases like journalp' -- when called as a subparser I think } diff --git a/hledger-lib/Hledger/Read/RulesReader.hs b/hledger-lib/Hledger/Read/RulesReader.hs index cc34e947e..96da0a2be 100644 --- a/hledger-lib/Hledger/Read/RulesReader.hs +++ b/hledger-lib/Hledger/Read/RulesReader.hs @@ -52,6 +52,7 @@ import Control.Monad.State.Strict (StateT, get, modify', evalStateT) import Control.Monad.Trans.Class (lift) import Data.Char (toLower, isDigit, isSpace, isAlphaNum, ord) import Data.Bifunctor (first) +import Data.Encoding (encodingFromStringExplicit) import Data.Functor ((<&>)) import Data.List (elemIndex, mapAccumL, nub, sortOn) #if !MIN_VERSION_base(4,20,0) @@ -69,6 +70,7 @@ import Data.Time ( Day, TimeZone, UTCTime, LocalTime, ZonedTime(ZonedTime), defaultTimeLocale, getCurrentTimeZone, localDay, parseTimeM, utcToLocalTime, localTimeToUTC, zonedTimeToUTC) import Safe (atMay, headMay, lastMay, readMay) import System.FilePath ((), takeDirectory, takeExtension, stripExtension, takeFileName) +import System.IO (Handle, hClose) import qualified Data.Csv as Cassava import qualified Data.Csv.Parser.Megaparsec as CassavaMegaparsec import qualified Data.ByteString as B @@ -116,10 +118,11 @@ getDownloadDir = do -- file's directory. When a glob pattern matches multiple files, the alphabetically -- last is used. (Eg in case of multiple numbered downloads, the highest-numbered -- will be used.) --- The provided text, or a --rules option, are ignored by this reader. +-- The provided handle, or a --rules option, are ignored by this reader. -- Balance assertions are not checked. -parse :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal -parse iopts f _ = do +parse :: InputOpts -> FilePath -> Handle -> ExceptT String IO Journal +parse iopts f h = do + lift $ hClose h -- We don't need it rules <- readRulesFile $ dbg4 "reading rules file" f -- XXX higher-than usual debug level for file reading to bypass excessive noise from elsewhere, normally 6 or 7 mdatafile <- liftIO $ do @@ -139,8 +142,8 @@ parse iopts f _ = do if not (dat=="-" || exists) then return nulljournal -- data file inferred from rules file name was not found else do - t <- liftIO $ readFileOrStdinPortably dat - readJournalFromCsv (Just $ Left rules) dat t Nothing + dath <- liftIO $ openFileOrStdin dat + readJournalFromCsv (Just $ Left rules) dat dath Nothing -- apply any command line account aliases. Can fail with a bad replacement pattern. >>= liftEither . journalApplyAliases (aliasesFromOpts iopts) -- journalFinalise assumes the journal's items are @@ -500,6 +503,7 @@ directivep = (do directives :: [Text] directives = ["source" + ,"encoding" ,"date-format" ,"decimal-mark" ,"separator" @@ -908,9 +912,9 @@ _CSV_READING__________________________________________ = undefined -- -- 4. Return the transactions as a Journal. -- -readJournalFromCsv :: Maybe (Either CsvRules FilePath) -> FilePath -> Text -> Maybe SepFormat -> ExceptT String IO Journal -readJournalFromCsv Nothing "-" _ _ = throwError "please use --rules when reading CSV from stdin" -readJournalFromCsv merulesfile csvfile csvtext sep = do +readJournalFromCsv :: Maybe (Either CsvRules FilePath) -> FilePath -> Handle -> Maybe SepFormat -> ExceptT String IO Journal +readJournalFromCsv Nothing "-" h _ = lift (hClose h) *> throwError "please use --rules when reading CSV from stdin" +readJournalFromCsv merulesfile csvfile csvhandle sep = do -- for now, correctness is the priority here, efficiency not so much rules <- case merulesfile of @@ -919,6 +923,16 @@ readJournalFromCsv merulesfile csvfile csvtext sep = do Nothing -> readRulesFile $ rulesFileFor csvfile dbg6IO "csv rules" rules + -- read csv while being aware of the encoding + mencoding <- do + -- XXX higher-than usual debug level for file reading to bypass excessive noise from elsewhere, normally 6 or 7 + case T.unpack <$> getDirective "encoding" rules of + Just rawenc -> case encodingFromStringExplicit $ dbg4 "raw-encoding" rawenc of + Just enc -> return . Just $ dbg4 "encoding" enc + Nothing -> throwError $ "Invalid encoding: " <> rawenc + Nothing -> return Nothing + csvtext <- lift $ readHandlePortably' mencoding csvhandle + -- convert the csv data to lines and remove all empty/blank lines let csvlines1 = dbg9 "csvlines1" $ filter (not . T.null . T.strip) $ dbg9 "csvlines0" $ T.lines csvtext diff --git a/hledger-lib/Hledger/Read/TimeclockReader.hs b/hledger-lib/Hledger/Read/TimeclockReader.hs index fdfd930e4..3160e9fd2 100644 --- a/hledger-lib/Hledger/Read/TimeclockReader.hs +++ b/hledger-lib/Hledger/Read/TimeclockReader.hs @@ -79,7 +79,7 @@ reader :: MonadIO m => Reader m reader = Reader {rFormat = Timeclock ,rExtensions = ["timeclock"] - ,rReadFn = parse + ,rReadFn = handleReadFnToTextReadFn parse ,rParser = timeclockfilep } diff --git a/hledger-lib/Hledger/Read/TimedotReader.hs b/hledger-lib/Hledger/Read/TimedotReader.hs index 99d19ea37..66257f33e 100644 --- a/hledger-lib/Hledger/Read/TimedotReader.hs +++ b/hledger-lib/Hledger/Read/TimedotReader.hs @@ -68,7 +68,7 @@ reader :: MonadIO m => Reader m reader = Reader {rFormat = Timedot ,rExtensions = ["timedot"] - ,rReadFn = parse + ,rReadFn = handleReadFnToTextReadFn parse ,rParser = timedotp } diff --git a/hledger-lib/Hledger/Reports/PostingsReport.hs b/hledger-lib/Hledger/Reports/PostingsReport.hs index 66b24744b..16a3a090f 100644 --- a/hledger-lib/Hledger/Reports/PostingsReport.hs +++ b/hledger-lib/Hledger/Reports/PostingsReport.hs @@ -301,7 +301,7 @@ tests_PostingsReport = testGroup "PostingsReport" [ ,"postings report with cleared option" ~: do let opts = defreportopts{cleared_=True} - j <- readJournal' sample_journal_str + j <- readJournal'' sample_journal_str (postingsReportAsText opts $ postingsReport opts (queryFromOpts date1 opts) j) `is` unlines ["2008/06/03 eat & shop expenses:food $1 $1" ," expenses:supplies $1 $2" @@ -313,7 +313,7 @@ tests_PostingsReport = testGroup "PostingsReport" [ ,"postings report with uncleared option" ~: do let opts = defreportopts{uncleared_=True} - j <- readJournal' sample_journal_str + j <- readJournal'' sample_journal_str (postingsReportAsText opts $ postingsReport opts (queryFromOpts date1 opts) j) `is` unlines ["2008/01/01 income assets:bank:checking $1 $1" ," income:salary $-1 0" @@ -325,7 +325,7 @@ tests_PostingsReport = testGroup "PostingsReport" [ ,"postings report sorts by date" ~: do - j <- readJournal' $ unlines + j <- readJournal'' $ unlines ["2008/02/02 a" ," b 1" ," c" diff --git a/hledger-lib/Hledger/Utils/IO.hs b/hledger-lib/Hledger/Utils/IO.hs index da163e0cb..91b856ffd 100644 --- a/hledger-lib/Hledger/Utils/IO.hs +++ b/hledger-lib/Hledger/Utils/IO.hs @@ -4,7 +4,9 @@ pretty-printing haskell values, error reporting, time, files, command line parsi terminals, pager output, ANSI colour/styles, etc. -} +{-# LANGUAGE ImplicitParams #-} {-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PackageImports #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -31,11 +33,15 @@ module Hledger.Utils.IO ( expandPath, expandGlob, sortByModTime, + openFileOrStdin, readFileOrStdinPortably, + readFileOrStdinPortably', readFileStrictly, readFilePortably, readHandlePortably, + readHandlePortably', -- hereFileRelative, + inputToHandle, -- * Command line parsing progArgs, @@ -111,6 +117,7 @@ import Data.Char (toLower) import Data.Colour.RGBSpace (RGB(RGB)) import Data.Colour.RGBSpace.HSL (lightness) import Data.Colour.SRGB (sRGB) +import Data.Encoding (DynEncoding) import Data.FileEmbed (makeRelativeToProject, embedStringFile) import Data.Functor ((<&>)) import Data.List hiding (uncons) @@ -136,8 +143,9 @@ import System.FilePath (isRelative, ()) import "Glob" System.FilePath.Glob (glob) import System.Info (os) import System.IO (Handle, IOMode (..), hGetEncoding, hSetEncoding, hSetNewlineMode, openFile, stdin, stdout, stderr, universalNewlineMode, utf8_bom, hIsTerminalDevice, hPutStr, hClose) +import qualified System.IO.Encoding as Enc import System.IO.Unsafe (unsafePerformIO) -import System.Process (CreateProcess(..), StdStream(CreatePipe), shell, waitForProcess, withCreateProcess) +import System.Process (CreateProcess(..), StdStream(CreatePipe), createPipe, shell, waitForProcess, withCreateProcess) import Text.Pretty.Simple (CheckColorTty(..), OutputOptions(..), defaultOutputOptionsDarkBg, defaultOutputOptionsNoColor, pShowOpt, pPrintOpt) import Hledger.Utils.Text (WideBuilder(WideBuilder)) @@ -280,19 +288,39 @@ readFilePortably f = openFile f ReadMode >>= readHandlePortably -- | Like readFilePortably, but read from standard input if the path is "-". readFileOrStdinPortably :: String -> IO T.Text -readFileOrStdinPortably f = openFileOrStdin f ReadMode >>= readHandlePortably - where - openFileOrStdin :: String -> IOMode -> IO Handle - openFileOrStdin "-" _ = return stdin - openFileOrStdin f' m = openFile f' m +readFileOrStdinPortably = readFileOrStdinPortably' Nothing + +-- | Like readFileOrStdinPortably, but take an optional converter. +readFileOrStdinPortably' :: Maybe DynEncoding -> String -> IO T.Text +readFileOrStdinPortably' c f = openFileOrStdin f >>= readHandlePortably' c + +openFileOrStdin :: String -> IO Handle +openFileOrStdin "-" = return stdin +openFileOrStdin f' = openFile f' ReadMode readHandlePortably :: Handle -> IO T.Text -readHandlePortably h = do +readHandlePortably = readHandlePortably' Nothing + +readHandlePortably' :: Maybe DynEncoding -> Handle -> IO T.Text +readHandlePortably' Nothing h = do hSetNewlineMode h universalNewlineMode menc <- hGetEncoding h when (fmap show menc == Just "UTF-8") $ -- XXX no Eq instance, rely on Show hSetEncoding h utf8_bom T.hGetContents h +readHandlePortably' (Just e) h = + -- We need to manually apply the newline mode + -- Since we already have a Text + T.replace "\r\n" "\n" . T.pack <$> let ?enc = e in Enc.hGetContents h + +inputToHandle :: T.Text -> IO Handle +inputToHandle t = do + (r, w) <- createPipe + hSetEncoding r utf8_bom + hSetEncoding w utf8_bom + T.hPutStr w t + hClose w + return r -- | Like embedFile, but takes a path relative to the package directory. embedFileRelative :: FilePath -> Q Exp diff --git a/hledger-lib/hledger-lib.cabal b/hledger-lib/hledger-lib.cabal index 8ef36d797..4e5f69137 100644 --- a/hledger-lib/hledger-lib.cabal +++ b/hledger-lib/hledger-lib.cabal @@ -141,6 +141,7 @@ library , deepseq , directory >=1.2.6.1 , doclayout >=0.3 && <0.6 + , encoding >=0.10 , extra >=1.6.3 , file-embed >=0.0.10 , filepath @@ -201,6 +202,7 @@ test-suite doctest , directory >=1.2.6.1 , doclayout >=0.3 && <0.6 , doctest >=0.18.1 + , encoding >=0.10 , extra >=1.6.3 , file-embed >=0.0.10 , filepath @@ -262,6 +264,7 @@ test-suite unittest , deepseq , directory >=1.2.6.1 , doclayout >=0.3 && <0.6 + , encoding >=0.10 , extra >=1.6.3 , file-embed >=0.0.10 , filepath diff --git a/hledger-lib/package.yaml b/hledger-lib/package.yaml index 7c6e4d898..cdefacfe5 100644 --- a/hledger-lib/package.yaml +++ b/hledger-lib/package.yaml @@ -59,6 +59,7 @@ dependencies: - Decimal >=0.5.1 - directory >=1.2.6.1 - doclayout >=0.3 && <0.6 +- encoding >=0.10 - file-embed >=0.0.10 - filepath - hashtables >=1.2.3.1 diff --git a/hledger-web/Hledger/Web/Test.hs b/hledger-web/Hledger/Web/Test.hs index 280d090f4..cf689ea1a 100644 --- a/hledger-web/Hledger/Web/Test.hs +++ b/hledger-web/Hledger/Web/Test.hs @@ -131,7 +131,7 @@ hledgerWebTest = do rawopts = [("forecast","")] iopts = rawOptsToInputOpts d usecolor True $ mkRawOpts rawopts f = "fake" -- need a non-null filename so forecast transactions get index 0 - pj <- readJournal' (T.pack $ unlines -- PARTIAL: readJournal' should not fail + pj <- readJournal'' (T.pack $ unlines -- PARTIAL: readJournal'' should not fail ["~ monthly" ," assets 10" ," income" diff --git a/hledger-web/Hledger/Web/Widget/Common.hs b/hledger-web/Hledger/Web/Widget/Common.hs index a71ffb664..d54aa1ef3 100644 --- a/hledger-web/Hledger/Web/Widget/Common.hs +++ b/hledger-web/Hledger/Web/Widget/Common.hs @@ -66,7 +66,7 @@ writeJournalTextIfValidAndChanged f t = mapExceptT liftIO $ do -- formatdirectivep, #1194) writeFileWithBackupIfChanged require them. -- XXX klunky. Any equivalent of "hSetNewlineMode h universalNewlineMode" for form posts ? let t' = T.replace "\r" "" t - j <- readJournal definputopts (Just f) t' + j <- readJournal definputopts (Just f) =<< liftIO (inputToHandle t') _ <- liftIO $ j `seq` writeFileWithBackupIfChanged f t' -- Only write backup if the journal didn't error return () diff --git a/hledger/Hledger/Cli/Commands.hs b/hledger/Hledger/Cli/Commands.hs index f4b30ebed..d5eb4cbdb 100644 --- a/hledger/Hledger/Cli/Commands.hs +++ b/hledger/Hledger/Cli/Commands.hs @@ -391,8 +391,8 @@ tests_Commands = testGroup "Commands" [ let ignoresourcepos j = j{jtxns=map (\t -> t{tsourcepos=nullsourcepospair}) (jtxns j)} sameParse str1 str2 = do - j1 <- ignoresourcepos <$> readJournal' str1 -- PARTIAL: - j2 <- ignoresourcepos <$> readJournal' str2 -- PARTIAL: + j1 <- ignoresourcepos <$> readJournal'' str1 -- PARTIAL: + j2 <- ignoresourcepos <$> readJournal'' str2 -- PARTIAL: j1 @?= j2{jlastreadtime=jlastreadtime j1, jfiles=jfiles j1} --, jparsestate=jparsestate j1} sameParse ("2008/12/07 One\n alpha $-1\n beta $1\n" <> @@ -409,19 +409,19 @@ tests_Commands = testGroup "Commands" [ ) ,testCase "preserves \"virtual\" posting type" $ do - j <- readJournal' "apply account test\n2008/12/07 One\n (from) $-1\n (to) $1\n" -- PARTIAL: + j <- readJournal'' "apply account test\n2008/12/07 One\n (from) $-1\n (to) $1\n" -- PARTIAL: let p = headErr $ tpostings $ headErr $ jtxns j -- PARTIAL headErrs succeed because txns & postings provided paccount p @?= "test:from" ptype p @?= VirtualPosting ] ,testCase "alias directive" $ do - j <- readJournal' "!alias expenses = equity:draw:personal\n1/1\n (expenses:food) 1\n" -- PARTIAL: + j <- readJournal'' "!alias expenses = equity:draw:personal\n1/1\n (expenses:food) 1\n" -- PARTIAL: let p = headErr $ tpostings $ headErr $ jtxns j -- PARTIAL headErrs succeed because txns & postings provided paccount p @?= "equity:draw:personal:food" ,testCase "Y default year directive" $ do - j <- readJournal' defaultyear_journal_txt -- PARTIAL: + j <- readJournal'' defaultyear_journal_txt -- PARTIAL: tdate (headErr $ jtxns j) @?= fromGregorian 2009 1 1 -- PARTIAL headErr succeeds because defaultyear_journal_txt has a txn ,testCase "ledgerAccountNames" $ @@ -454,7 +454,7 @@ tests_Commands = testGroup "Commands" [ -- t1 = LocalTime date1 midday {- -samplejournal = readJournal' sample_journal_str +samplejournal = readJournal'' sample_journal_str sample_journal_str = unlines ["; A sample journal file." diff --git a/hledger/Hledger/Cli/Commands/Add.hs b/hledger/Hledger/Cli/Commands/Add.hs index 28ce0d63d..61dd3f727 100644 --- a/hledger/Hledger/Cli/Commands/Add.hs +++ b/hledger/Hledger/Cli/Commands/Add.hs @@ -459,7 +459,7 @@ ensureOneNewlineTerminated = (<>"\n") . T.dropWhileEnd (=='\n') -- | Convert a string of journal data into a register report. registerFromString :: T.Text -> IO TL.Text registerFromString s = do - j <- readJournal' s + j <- readJournal'' s return . postingsReportAsText opts $ postingsReport rspec j where ropts = defreportopts{empty_=True} diff --git a/hledger/Hledger/Cli/Commands/Balance.hs b/hledger/Hledger/Cli/Commands/Balance.hs index fe96c6cd3..98ac48f38 100644 --- a/hledger/Hledger/Cli/Commands/Balance.hs +++ b/hledger/Hledger/Cli/Commands/Balance.hs @@ -1226,7 +1226,7 @@ tests_Balance = testGroup "Balance" [ testGroup "balanceReportAsText" [ testCase "unicode in balance layout" $ do - j <- readJournal' "2009/01/01 * медвежья шкура\n расходы:покупки 100\n актив:наличные\n" + j <- readJournal'' "2009/01/01 * медвежья шкура\n расходы:покупки 100\n актив:наличные\n" let rspec = defreportspec{_rsReportOpts=defreportopts{no_total_=True}} TB.toLazyText (balanceReportAsText (_rsReportOpts rspec) (balanceReport rspec{_rsDay=fromGregorian 2008 11 26} j)) @?= diff --git a/hledger/Hledger/Cli/Commands/Register.hs b/hledger/Hledger/Cli/Commands/Register.hs index 8e5862ff9..3d22b5488 100644 --- a/hledger/Hledger/Cli/Commands/Register.hs +++ b/hledger/Hledger/Cli/Commands/Register.hs @@ -304,7 +304,7 @@ tests_Register = testGroup "Register" [ testGroup "postingsReportAsText" [ testCase "unicode in register layout" $ do - j <- readJournal' "2009/01/01 * медвежья шкура\n расходы:покупки 100\n актив:наличные\n" + j <- readJournal'' "2009/01/01 * медвежья шкура\n расходы:покупки 100\n актив:наличные\n" let rspec = defreportspec (TL.unpack . postingsReportAsText defcliopts $ postingsReport rspec j) @?= diff --git a/hledger/hledger.m4.md b/hledger/hledger.m4.md index 53a4661ee..d3d877c8a 100644 --- a/hledger/hledger.m4.md +++ b/hledger/hledger.m4.md @@ -106,7 +106,7 @@ For more about how to do that on your system, see [Common tasks > Setting LEDGER ## Text encoding -Data files containing non-ascii characters must use UTF-8 encoding. +Data files containing non-ascii characters must use UTF-8 encoding, with the exception being `csv` files (see [`encoding`](#encoding) below). An optional [byte order mark (BOM)](https://www.unicode.org/faq/utf_bom.html#BOM) is allowed, at the beginning of the file (only). Also, your system should be configured with a locale that can decode UTF-8 text. @@ -114,8 +114,6 @@ On some unix systems, you may need set the `LANG` environment variable, eg. You can read more about this in [Unicode characters](#unicode-characters), below. On unix systems you can check a file's encoding with the `file` command. -If you need to import from a UTF-16-encoded CSV file, say, -you can convert it to UTF-8 with the `iconv` command. ## Data formats @@ -3217,6 +3215,7 @@ The following kinds of rule can appear in the rules file, in any order. | | | |-------------------------------------------------|------------------------------------------------------------------------------------------------| | [**`source`**](#source) | optionally declare which file to read data from | +| [**`encoding`**](#encoding) | optionally declare which encoding the data has | | [**`separator`**](#separator) | declare the field separator, instead of relying on file extension | | [**`skip`**](#skip) | skip one or more header lines at start of file | | [**`date-format`**](#date-format) | declare how to parse CSV dates/date-times | @@ -3263,6 +3262,77 @@ source Checking1*.csv See also ["Working with CSV > Reading files specified by rule"](#reading-files-specified-by-rule). +## `encoding` + +```rules +encoding ENCODING +``` + +Specifying `encoding` followed by a valid encoding tells HLedger how to convert a +csv to be able to make use of it. +This is most often useful when getting a csv from a bank as they are sometimes +in an old encoding. + +If none is given, `utf8` is assumed. + +The encoding will be checked case-insensitive with some alternative spellings also allowed. +The full list of valid encodings is: +- ASCII +- UTF8 +- UTF16 +- UTF32 +- ISO88591 +- ISO88592 +- ISO88593 +- ISO88594 +- ISO88595 +- ISO88596 +- ISO88597 +- ISO88598 +- ISO88599 +- ISO885910 +- ISO885911 +- ISO885913 +- ISO885914 +- ISO885915 +- ISO885916 +- CP1250 +- CP1251 +- CP1252 +- CP1253 +- CP1254 +- CP1255 +- CP1256 +- CP1257 +- CP1258 +- KOI8R +- KOI8U +- GB18030 +- MacOSRoman +- JISX0201 +- JISX0208 +- ISO2022JP +- ShiftJIS +- CP437 +- CP737 +- CP775 +- CP850 +- CP852 +- CP855 +- CP857 +- CP860 +- CP861 +- CP862 +- CP863 +- CP864 +- CP865 +- CP866 +- CP869 +- CP874 +- CP932 + +Alternate spellings may be found in the [source code of `encoding`](https://hackage.haskell.org/package/encoding/docs/src/Data.Encoding.html#encodingFromStringExplicit) + ## `separator` You can use the `separator` rule to read other kinds of diff --git a/stack.yaml b/stack.yaml index d2337ec55..696526176 100644 --- a/stack.yaml +++ b/stack.yaml @@ -12,10 +12,11 @@ packages: # To minimise problems, use the versions shipped with GHC 9.10.1 when possible. # See https://github.com/haskell/unix/issues/329. -# extra-deps: +extra-deps: # - base-compat-0.14.0 -# # for hledger-lib +# for hledger-lib +- encoding-0.10 # - Cabal-3.12.0.0 # - Cabal-syntax-3.12.0.0 # #- directory-1.3.8.3 diff --git a/stack8.10.yaml b/stack8.10.yaml index 4535456d2..8c512d8ff 100644 --- a/stack8.10.yaml +++ b/stack8.10.yaml @@ -31,6 +31,7 @@ extra-deps: - text-builder-0.6.7 - text-builder-dev-0.3.3.2 - isomorphism-class-0.1.0.7 +- encoding-0.10 # for hledger: # silence a warning - wizards-1.0.3@rev:3 @@ -50,6 +51,10 @@ nix: pure: false packages: [perl gmp ncurses zlib] +flags: + encoding: + systemEncoding: false # See https://github.com/dmwit/encoding/issues/26 + # ghc-options: # "$locals": -fplugin Debug.Breakpoint diff --git a/stack9.0.yaml b/stack9.0.yaml index 2dfee801c..69352dd8f 100644 --- a/stack9.0.yaml +++ b/stack9.0.yaml @@ -22,6 +22,7 @@ extra-deps: - text-builder-0.6.7 - text-builder-dev-0.3.3.2 - isomorphism-class-0.1.0.7 +- encoding-0.10 # for hledger: # for hledger-ui: - bimap-0.5.0 @@ -39,6 +40,10 @@ nix: pure: false packages: [perl gmp ncurses zlib] +flags: + encoding: + systemEncoding: false # See https://github.com/dmwit/encoding/issues/26 + # ghc-options: # "$locals": -fplugin Debug.Breakpoint diff --git a/stack9.12.yaml b/stack9.12.yaml index 7298d8615..98f1f0a43 100644 --- a/stack9.12.yaml +++ b/stack9.12.yaml @@ -18,7 +18,8 @@ packages: extra-deps: # - base-compat-0.14.0 -# # for hledger-lib + # for hledger-lib +- encoding-0.10 # - Cabal-3.12.0.0 # - Cabal-syntax-3.12.0.0 # #- directory-1.3.8.3 @@ -41,6 +42,10 @@ nix: pure: false packages: [perl gmp ncurses zlib] +flags: + encoding: + systemEncoding: false # See https://github.com/dmwit/encoding/issues/26 + # ghc-options: # "$locals": -Wno-x-partial # "$locals": -fplugin Debug.Breakpoint diff --git a/stack9.2.yaml b/stack9.2.yaml index 433c1e251..322b1f6d3 100644 --- a/stack9.2.yaml +++ b/stack9.2.yaml @@ -13,6 +13,7 @@ extra-deps: - megaparsec-9.3.0 - safe-0.3.21 # for hledger-lib: +- encoding-0.10 # for hledger: # for hledger-ui: - brick-2.3.1 @@ -29,6 +30,10 @@ nix: pure: false packages: [perl gmp ncurses zlib] +flags: + encoding: + systemEncoding: false # See https://github.com/dmwit/encoding/issues/26 + # ghc-options: # "$locals": -fplugin Debug.Breakpoint diff --git a/stack9.4.yaml b/stack9.4.yaml index d2a3c0f59..313d64516 100644 --- a/stack9.4.yaml +++ b/stack9.4.yaml @@ -16,11 +16,16 @@ extra-deps: - vty-crossplatform-0.4.0.0 - vty-unix-0.2.0.0 - vty-windows-0.2.0.2 +- encoding-0.10 nix: pure: false packages: [perl gmp ncurses zlib] +flags: + encoding: + systemEncoding: false # See https://github.com/dmwit/encoding/issues/26 + # ghc-options: # "$locals": -fplugin Debug.Breakpoint diff --git a/stack9.6.yaml b/stack9.6.yaml index 3968045aa..8a25a36a3 100644 --- a/stack9.6.yaml +++ b/stack9.6.yaml @@ -10,12 +10,17 @@ packages: extra-deps: - base-compat-0.14.0 -- vty-windows-0.2.0.1 # not yet in stackage +- vty-windows-0.2.0.1 +- encoding-0.10 nix: pure: false packages: [perl gmp ncurses zlib] +flags: + encoding: + systemEncoding: false # See https://github.com/dmwit/encoding/issues/26 + # ghc-options: # "$locals": -fplugin Debug.Breakpoint diff --git a/stack9.8.yaml b/stack9.8.yaml index dfb0093eb..3c72c3710 100644 --- a/stack9.8.yaml +++ b/stack9.8.yaml @@ -10,11 +10,16 @@ packages: extra-deps: - base-compat-0.14.0 +- encoding-0.10 nix: pure: false packages: [perl gmp ncurses zlib] +flags: + encoding: + systemEncoding: false # See https://github.com/dmwit/encoding/issues/26 + # ghc-options: # "$locals": -Wno-x-partial # "$locals": -fplugin Debug.Breakpoint