lib: when the text encoding is UTF-8, ignore any UTF-8 BOM prefix

Paypal's new CSV has this BOM prefix (at the time of writing),
which was causing a confusing parse error.
This commit is contained in:
Simon Michael 2018-01-04 16:29:23 -08:00
parent 81e964502b
commit fedf36e823

View File

@ -33,7 +33,7 @@ module Hledger.Utils (---- provide these frequently used modules - or not, for c
-- the rest need to be done in each module I think -- the rest need to be done in each module I think
) )
where where
import Control.Monad (liftM) import Control.Monad (liftM, when)
-- import Data.Char -- import Data.Char
import Data.Default import Data.Default
import Data.List import Data.List
@ -150,12 +150,14 @@ firstJust ms = case dropWhile (==Nothing) ms of
[] -> Nothing [] -> Nothing
(md:_) -> md (md:_) -> md
-- | Read text from a file, handling any of the usual line ending conventions. -- | Read text from a file,
-- handling any of the usual line ending conventions,
-- using the system locale's text encoding,
-- ignoring any utf8 BOM prefix (as seen in paypal's 2018 CSV, eg) if that encoding is utf8.
readFilePortably :: FilePath -> IO Text readFilePortably :: FilePath -> IO Text
readFilePortably f = openFile f ReadMode >>= readHandlePortably readFilePortably f = openFile f ReadMode >>= readHandlePortably
-- | Read text from a file, or from standard input if the path is "-", -- | Like readFilePortably, but read from standard input if the path is "-".
-- handling any of the usual line ending conventions.
readFileOrStdinPortably :: String -> IO Text readFileOrStdinPortably :: String -> IO Text
readFileOrStdinPortably f = openFileOrStdin f ReadMode >>= readHandlePortably readFileOrStdinPortably f = openFileOrStdin f ReadMode >>= readHandlePortably
where where
@ -166,6 +168,9 @@ readFileOrStdinPortably f = openFileOrStdin f ReadMode >>= readHandlePortably
readHandlePortably :: Handle -> IO Text readHandlePortably :: Handle -> IO Text
readHandlePortably h = do readHandlePortably h = do
hSetNewlineMode h universalNewlineMode hSetNewlineMode h universalNewlineMode
menc <- hGetEncoding h
when (fmap show menc == Just "UTF-8") $ -- XXX no Eq instance, rely on Show
hSetEncoding h utf8_bom
T.hGetContents h T.hGetContents h
-- | Total version of maximum, for integral types, giving 0 for an empty list. -- | Total version of maximum, for integral types, giving 0 for an empty list.