We should now read all text in universal newline mode, so eg journal files with DOS/windows line endings are fine. This also deprecates and disables our IO encoding compatibility layer, which prevented many encoding-related problems with certain platforms and GHC versions. With modern GHC (7.x) this is now hopefully totally unnecessary, but the module remains in place just in case.
127 lines
3.9 KiB
Haskell
127 lines
3.9 KiB
Haskell
{-# LANGUAGE CPP #-}
|
|
{- |
|
|
|
|
UTF-8 aware string IO functions that will work across multiple platforms
|
|
and GHC versions. Includes code from Text.Pandoc.UTF8 ((C) 2010 John
|
|
MacFarlane).
|
|
|
|
Example usage:
|
|
|
|
import Prelude hiding (readFile,writeFile,appendFile,getContents,putStr,putStrLn)
|
|
import UTF8IOCompat (readFile,writeFile,appendFile,getContents,putStr,putStrLn)
|
|
import UTF8IOCompat (SystemString,fromSystemString,toSystemString,error',userError')
|
|
|
|
2013/4/10 update: we now trust that current GHC versions & platforms
|
|
do the right thing, so this file is a no-op and on its way to being removed.
|
|
Not carefully tested.
|
|
|
|
-}
|
|
|
|
module Hledger.Utils.UTF8IOCompat (
|
|
readFile,
|
|
writeFile,
|
|
appendFile,
|
|
getContents,
|
|
hGetContents,
|
|
putStr,
|
|
putStrLn,
|
|
hPutStr,
|
|
hPutStrLn,
|
|
--
|
|
SystemString,
|
|
fromSystemString,
|
|
toSystemString,
|
|
error',
|
|
userError',
|
|
)
|
|
where
|
|
|
|
import Control.Monad (liftM)
|
|
import qualified Data.ByteString.Lazy as B
|
|
import qualified Data.ByteString.Lazy.Char8 as B8
|
|
import qualified Data.ByteString.Lazy.UTF8 as U8 (toString, fromString)
|
|
import Prelude hiding (readFile, writeFile, appendFile, getContents, putStr, putStrLn)
|
|
import System.IO -- (Handle)
|
|
-- #if __GLASGOW_HASKELL__ < 702
|
|
-- import Codec.Binary.UTF8.String as UTF8 (decodeString, encodeString, isUTF8Encoded)
|
|
-- import System.Info (os)
|
|
-- #endif
|
|
|
|
-- bom :: B.ByteString
|
|
-- bom = B.pack [0xEF, 0xBB, 0xBF]
|
|
|
|
-- stripBOM :: B.ByteString -> B.ByteString
|
|
-- stripBOM s | bom `B.isPrefixOf` s = B.drop 3 s
|
|
-- stripBOM s = s
|
|
|
|
-- readFile :: FilePath -> IO String
|
|
-- readFile = liftM (U8.toString . stripBOM) . B.readFile
|
|
|
|
-- writeFile :: FilePath -> String -> IO ()
|
|
-- writeFile f = B.writeFile f . U8.fromString
|
|
|
|
-- appendFile :: FilePath -> String -> IO ()
|
|
-- appendFile f = B.appendFile f . U8.fromString
|
|
|
|
-- getContents :: IO String
|
|
-- getContents = liftM (U8.toString . stripBOM) B.getContents
|
|
|
|
-- hGetContents :: Handle -> IO String
|
|
-- hGetContents h = liftM (U8.toString . stripBOM) (B.hGetContents h)
|
|
|
|
-- putStr :: String -> IO ()
|
|
-- putStr = bs_putStr . U8.fromString
|
|
|
|
-- putStrLn :: String -> IO ()
|
|
-- putStrLn = bs_putStrLn . U8.fromString
|
|
|
|
-- hPutStr :: Handle -> String -> IO ()
|
|
-- hPutStr h = bs_hPutStr h . U8.fromString
|
|
|
|
-- hPutStrLn :: Handle -> String -> IO ()
|
|
-- hPutStrLn h = bs_hPutStrLn h . U8.fromString
|
|
|
|
-- -- span GHC versions including 6.12.3 - 7.4.1:
|
|
-- bs_putStr = B8.putStr
|
|
-- bs_putStrLn = B8.putStrLn
|
|
-- bs_hPutStr = B8.hPut
|
|
-- bs_hPutStrLn h bs = B8.hPut h bs >> B8.hPut h (B.singleton 0x0a)
|
|
|
|
|
|
-- | A string received from or being passed to the operating system, such
|
|
-- as a file path, command-line argument, or environment variable name or
|
|
-- value. With GHC versions before 7.2 on some platforms (posix) these are
|
|
-- typically encoded. When converting, we assume the encoding is UTF-8 (cf
|
|
-- <http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html#UTF8>).
|
|
type SystemString = String
|
|
|
|
-- | Convert a system string to an ordinary string, decoding from UTF-8 if
|
|
-- it appears to be UTF8-encoded and GHC version is less than 7.2.
|
|
fromSystemString :: SystemString -> String
|
|
-- #if __GLASGOW_HASKELL__ < 702
|
|
-- fromSystemString s = if UTF8.isUTF8Encoded s then UTF8.decodeString s else s
|
|
-- #else
|
|
fromSystemString = id
|
|
-- #endif
|
|
|
|
-- | Convert a unicode string to a system string, encoding with UTF-8 if
|
|
-- we are on a posix platform with GHC < 7.2.
|
|
toSystemString :: String -> SystemString
|
|
-- #if __GLASGOW_HASKELL__ < 702
|
|
-- toSystemString = case os of
|
|
-- "unix" -> UTF8.encodeString
|
|
-- "linux" -> UTF8.encodeString
|
|
-- "darwin" -> UTF8.encodeString
|
|
-- _ -> id
|
|
-- #else
|
|
toSystemString = id
|
|
-- #endif
|
|
|
|
-- | A SystemString-aware version of error.
|
|
error' :: String -> a
|
|
error' = error . toSystemString
|
|
|
|
-- | A SystemString-aware version of userError.
|
|
userError' :: String -> IOError
|
|
userError' = userError . toSystemString
|