From 8492f6cae4b3d2502edb3d89a194293ebfe6a52c Mon Sep 17 00:00:00 2001 From: Simon Michael Date: Thu, 29 Mar 2012 19:06:31 +0000 Subject: [PATCH] fix unicode handling on GHC >= 7.2, unify utf8 IO compatibility layer tests pass again from GHC 6.12.3 to 7.4.1 --- hledger-lib/Hledger/Read.hs | 2 +- hledger-lib/Hledger/Read/CsvReader.hs | 2 +- hledger-lib/Hledger/Read/JournalReader.hs | 2 +- hledger-lib/Hledger/Utils.hs | 50 ++-------- hledger-lib/Hledger/Utils/UTF8.hs | 87 ----------------- hledger-lib/Hledger/Utils/UTF8IOCompat.hs | 114 ++++++++++++++++++++++ hledger-lib/hledger-lib.cabal | 2 +- hledger-web/hledger-web.hs | 2 +- hledger/Hledger/Cli/Add.hs | 2 +- hledger/Hledger/Cli/Balance.hs | 2 +- hledger/Hledger/Cli/Histogram.hs | 2 +- hledger/Hledger/Cli/Options.hs | 2 +- hledger/Hledger/Cli/Print.hs | 2 +- hledger/Hledger/Cli/Register.hs | 2 +- hledger/Hledger/Cli/Stats.hs | 2 +- 15 files changed, 134 insertions(+), 141 deletions(-) delete mode 100644 hledger-lib/Hledger/Utils/UTF8.hs create mode 100644 hledger-lib/Hledger/Utils/UTF8IOCompat.hs diff --git a/hledger-lib/Hledger/Read.hs b/hledger-lib/Hledger/Read.hs index fb97a0363..5ab57dc9f 100644 --- a/hledger-lib/Hledger/Read.hs +++ b/hledger-lib/Hledger/Read.hs @@ -41,7 +41,7 @@ import Hledger.Read.TimelogReader as TimelogReader import Hledger.Read.CsvReader as CsvReader import Hledger.Utils import Prelude hiding (getContents, writeFile) -import Hledger.Utils.UTF8 (getContents, hGetContents, writeFile) +import Hledger.Utils.UTF8IOCompat (getContents, hGetContents, writeFile) journalEnvVar = "LEDGER_FILE" diff --git a/hledger-lib/Hledger/Read/CsvReader.hs b/hledger-lib/Hledger/Read/CsvReader.hs index e88085f9f..898fff8f4 100644 --- a/hledger-lib/Hledger/Read/CsvReader.hs +++ b/hledger-lib/Hledger/Read/CsvReader.hs @@ -51,7 +51,7 @@ import Text.Printf (hPrintf) import Hledger.Data import Prelude hiding (getContents) -import Hledger.Utils.UTF8 (getContents) +import Hledger.Utils.UTF8IOCompat (getContents) import Hledger.Utils import Hledger.Data.FormatStrings as FormatStrings import Hledger.Read.JournalReader (ledgeraccountname, someamount) diff --git a/hledger-lib/Hledger/Read/JournalReader.hs b/hledger-lib/Hledger/Read/JournalReader.hs index 82b402e22..c60d4ee0a 100644 --- a/hledger-lib/Hledger/Read/JournalReader.hs +++ b/hledger-lib/Hledger/Read/JournalReader.hs @@ -54,7 +54,7 @@ import System.Time (getClockTime) import Hledger.Data import Hledger.Utils import Prelude hiding (readFile) -import Hledger.Utils.UTF8 (readFile) +import Hledger.Utils.UTF8IOCompat (readFile) -- let's get to it diff --git a/hledger-lib/Hledger/Utils.hs b/hledger-lib/Hledger/Utils.hs index 8f2d829a8..5277f45a5 100644 --- a/hledger-lib/Hledger/Utils.hs +++ b/hledger-lib/Hledger/Utils.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE CPP #-} {-| Standard imports and utilities which are useful everywhere, or needed low @@ -19,12 +20,12 @@ module Hledger.Utils (---- provide these frequently used modules - or not, for c -- module Text.Printf, ---- all of this one: module Hledger.Utils, - Debug.Trace.trace - ---- and this for i18n - needs to be done in each module I think: - -- module Hledger.Utils.UTF8 + Debug.Trace.trace, + -- module Hledger.Utils.UTF8IOCompat + SystemString,fromSystemString,toSystemString,error',userError' + -- the rest need to be done in each module I think ) where -import Codec.Binary.UTF8.String as UTF8 (decodeString, encodeString, isUTF8Encoded) import Control.Monad.Error import Data.Char import Data.List @@ -35,15 +36,15 @@ import Data.Tree import Debug.Trace import System.Directory (getHomeDirectory) import System.FilePath(takeDirectory,combine) -import System.Info (os) import Test.HUnit import Text.ParserCombinators.Parsec import Text.Printf import Text.RegexPR -- import qualified Data.Map as Map -- --- import Prelude hiding (readFile,writeFile,getContents,putStr,putStrLn) --- import Hledger.Utils.UTF8 +-- import Prelude hiding (readFile,writeFile,appendFile,getContents,putStr,putStrLn) +-- import Hledger.Utils.UTF8IOCompat (readFile,writeFile,appendFile,getContents,putStr,putStrLn) +import Hledger.Utils.UTF8IOCompat (SystemString,fromSystemString,toSystemString,error',userError') -- strings @@ -183,41 +184,6 @@ fitto w h s = intercalate "\n" $ take h $ rows ++ repeat blankline fit w = take w . (++ repeat ' ') blankline = replicate w ' ' --- encoded platform strings - --- | A platform string is a string value from or for the operating system, --- such as a file path or command-line argument (or environment variable's --- name or value ?). On some platforms (such as unix) these are not real --- unicode strings but have some encoding such as UTF-8. This alias does --- no type enforcement but aids code clarity. -type PlatformString = String - --- | Convert a possibly encoded platform string to a real unicode string. --- We decode the UTF-8 encoding recommended for unix systems --- (cf http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html) --- and leave anything else unchanged. -fromPlatformString :: PlatformString -> String -fromPlatformString s = if UTF8.isUTF8Encoded s then UTF8.decodeString s else s - --- | Convert a unicode string to a possibly encoded platform string. --- On unix we encode with the recommended UTF-8 --- (cf http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html) --- and elsewhere we leave it unchanged. -toPlatformString :: String -> PlatformString -toPlatformString = case os of - "unix" -> UTF8.encodeString - "linux" -> UTF8.encodeString - "darwin" -> UTF8.encodeString - _ -> id - --- | A version of error that's better at displaying unicode. -error' :: String -> a -error' = error . toPlatformString - --- | A version of userError that's better at displaying unicode. -userError' :: String -> IOError -userError' = userError . toPlatformString - -- math difforzero :: (Num a, Ord a) => a -> a -> a diff --git a/hledger-lib/Hledger/Utils/UTF8.hs b/hledger-lib/Hledger/Utils/UTF8.hs deleted file mode 100644 index e9442b39d..000000000 --- a/hledger-lib/Hledger/Utils/UTF8.hs +++ /dev/null @@ -1,87 +0,0 @@ -{- -From pandoc, slightly extended. Example usage: - - import Prelude hiding (readFile,writeFile,getContents,putStr,putStrLn) - import Hledger.Utils.UTF8 (readFile,writeFile,getContents,putStr,putStrLn) - - ----------------------------------------------------------------------- -Copyright (C) 2010 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.UTF8 - Copyright : Copyright (C) 2010 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -UTF-8 aware string IO functions that will work with GHC 6.10 or 6.12. --} -module Hledger.Utils.UTF8 ( readFile - , writeFile - , appendFile - , getContents - , hGetContents - , putStr - , putStrLn - , hPutStr - , hPutStrLn - ) - -where -import qualified Data.ByteString.Lazy as B -import Data.ByteString.Lazy.UTF8 (toString, fromString) -import Prelude hiding (readFile, writeFile, appendFile, getContents, putStr, putStrLn) -import System.IO (Handle) -import Control.Monad (liftM) - -bom :: B.ByteString -bom = B.pack [0xEF, 0xBB, 0xBF] - -stripBOM :: B.ByteString -> B.ByteString -stripBOM s | bom `B.isPrefixOf` s = B.drop 3 s -stripBOM s = s - -readFile :: FilePath -> IO String -readFile = liftM (toString . stripBOM) . B.readFile - -writeFile :: FilePath -> String -> IO () -writeFile f = B.writeFile f . fromString - -appendFile :: FilePath -> String -> IO () -appendFile f = B.appendFile f . fromString - -getContents :: IO String -getContents = liftM (toString . stripBOM) B.getContents - -hGetContents :: Handle -> IO String -hGetContents h = liftM (toString . stripBOM) (B.hGetContents h) - -putStr :: String -> IO () -putStr = B.putStr . fromString - -putStrLn :: String -> IO () -putStrLn = B.putStrLn . fromString - -hPutStr :: Handle -> String -> IO () -hPutStr h = B.hPutStr h . fromString - -hPutStrLn :: Handle -> String -> IO () -hPutStrLn h s = hPutStr h (s ++ "\n") diff --git a/hledger-lib/Hledger/Utils/UTF8IOCompat.hs b/hledger-lib/Hledger/Utils/UTF8IOCompat.hs new file mode 100644 index 000000000..17231839a --- /dev/null +++ b/hledger-lib/Hledger/Utils/UTF8IOCompat.hs @@ -0,0 +1,114 @@ +{-# LANGUAGE CPP #-} +{- + +UTF-8 aware string IO functions that will work across multiple platforms +and GHC versions. Includes code from Text.Pandoc.UTF8 ((C) 2010 John +MacFarlane ). + +Example usage: + + import Prelude hiding (readFile,writeFile,appendFile,getContents,putStr,putStrLn) + import UTF8IOCompat (readFile,writeFile,appendFile,getContents,putStr,putStrLn) + import UTF8IOCompat (SystemString,fromSystemString,toSystemString,error',userError') + +-} + +module Hledger.Utils.UTF8IOCompat ( + readFile, + writeFile, + appendFile, + getContents, + hGetContents, + putStr, + putStrLn, + hPutStr, + hPutStrLn, + -- + SystemString, + fromSystemString, + toSystemString, + error', + userError', +) +where + +import qualified Data.ByteString.Lazy as B +import Data.ByteString.Lazy.UTF8 (toString, fromString) +import Prelude hiding (readFile, writeFile, appendFile, getContents, putStr, putStrLn) +import System.IO (Handle) +import Control.Monad (liftM) +#if __GLASGOW_HASKELL__ < 702 +import Codec.Binary.UTF8.String as UTF8 (decodeString, encodeString, isUTF8Encoded) +import System.Info (os) +#endif + +bom :: B.ByteString +bom = B.pack [0xEF, 0xBB, 0xBF] + +stripBOM :: B.ByteString -> B.ByteString +stripBOM s | bom `B.isPrefixOf` s = B.drop 3 s +stripBOM s = s + +readFile :: FilePath -> IO String +readFile = liftM (toString . stripBOM) . B.readFile + +writeFile :: FilePath -> String -> IO () +writeFile f = B.writeFile f . fromString + +appendFile :: FilePath -> String -> IO () +appendFile f = B.appendFile f . fromString + +getContents :: IO String +getContents = liftM (toString . stripBOM) B.getContents + +hGetContents :: Handle -> IO String +hGetContents h = liftM (toString . stripBOM) (B.hGetContents h) + +putStr :: String -> IO () +putStr = B.putStr . fromString + +putStrLn :: String -> IO () +putStrLn = B.putStrLn . fromString + +hPutStr :: Handle -> String -> IO () +hPutStr h = B.hPutStr h . fromString + +hPutStrLn :: Handle -> String -> IO () +hPutStrLn h s = hPutStr h (s ++ "\n") + +-- | A string received from or being passed to the operating system, such +-- as a file path, command-line argument, or environment variable name or +-- value. With GHC versions before 7.2 on some platforms (posix) these are +-- typically encoded. When converting, we assume the encoding is UTF-8 (cf +-- http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html#UTF8). +type SystemString = String + +-- | Convert a system string to an ordinary string, decoding from UTF-8 if +-- it appears to be UTF8-encoded and GHC version is less than 7.2. +fromSystemString :: SystemString -> String +#if __GLASGOW_HASKELL__ < 702 +fromSystemString s = if UTF8.isUTF8Encoded s then UTF8.decodeString s else s +#else +fromSystemString = id +#endif + +-- | Convert a unicode string to a system string, encoding with UTF-8 if +-- we are on a posix platform with GHC < 7.2. +toSystemString :: String -> SystemString +#if __GLASGOW_HASKELL__ < 702 +toSystemString = case os of + "unix" -> UTF8.encodeString + "linux" -> UTF8.encodeString + "darwin" -> UTF8.encodeString + _ -> id +#else +toSystemString = id +#endif + +-- | A SystemString-aware version of error. +error' :: String -> a +error' = error . toSystemString + +-- | A SystemString-aware version of userError. +userError' :: String -> IOError +userError' = userError . toSystemString diff --git a/hledger-lib/hledger-lib.cabal b/hledger-lib/hledger-lib.cabal index 9baefdfe4..f684f2b80 100644 --- a/hledger-lib/hledger-lib.cabal +++ b/hledger-lib/hledger-lib.cabal @@ -52,7 +52,7 @@ library Hledger.Read.Utils Hledger.Reports Hledger.Utils - Hledger.Utils.UTF8 + Hledger.Utils.UTF8IOCompat Build-Depends: base >= 3 && < 5 ,bytestring diff --git a/hledger-web/hledger-web.hs b/hledger-web/hledger-web.hs index 26fda47e6..186ca6c13 100644 --- a/hledger-web/hledger-web.hs +++ b/hledger-web/hledger-web.hs @@ -26,7 +26,7 @@ import Yesod.Logger (makeLogger) import Hledger import Hledger.Cli hiding (progname,prognameandversion) import Prelude hiding (putStrLn) -import Hledger.Utils.UTF8 (putStrLn) +import Hledger.Utils.UTF8IOCompat (putStrLn) import Hledger.Web diff --git a/hledger/Hledger/Cli/Add.hs b/hledger/Hledger/Cli/Add.hs index a3da7617b..4cff4216b 100644 --- a/hledger/Hledger/Cli/Add.hs +++ b/hledger/Hledger/Cli/Add.hs @@ -29,7 +29,7 @@ import qualified Data.Set as Set import Hledger import Prelude hiding (putStr, putStrLn, appendFile) -import Hledger.Utils.UTF8 (putStr, putStrLn, appendFile) +import Hledger.Utils.UTF8IOCompat (putStr, putStrLn, appendFile) import Hledger.Cli.Options import Hledger.Cli.Register (postingsReportAsText) import Hledger.Cli.Utils diff --git a/hledger/Hledger/Cli/Balance.hs b/hledger/Hledger/Cli/Balance.hs index 7e3bd91da..cf7eec332 100644 --- a/hledger/Hledger/Cli/Balance.hs +++ b/hledger/Hledger/Cli/Balance.hs @@ -105,7 +105,7 @@ import Test.HUnit import Hledger import Prelude hiding (putStr) -import Hledger.Utils.UTF8 (putStr) +import Hledger.Utils.UTF8IOCompat (putStr) import Hledger.Data.FormatStrings import qualified Hledger.Data.FormatStrings as Format import Hledger.Cli.Options diff --git a/hledger/Hledger/Cli/Histogram.hs b/hledger/Hledger/Cli/Histogram.hs index 14b882d0f..da4a9573a 100644 --- a/hledger/Hledger/Cli/Histogram.hs +++ b/hledger/Hledger/Cli/Histogram.hs @@ -15,7 +15,7 @@ import Hledger.Cli.Options import Hledger.Data import Hledger.Reports import Prelude hiding (putStr) -import Hledger.Utils.UTF8 (putStr) +import Hledger.Utils.UTF8IOCompat (putStr) barchar = '*' diff --git a/hledger/Hledger/Cli/Options.hs b/hledger/Hledger/Cli/Options.hs index 9492c0632..b39c0d097 100644 --- a/hledger/Hledger/Cli/Options.hs +++ b/hledger/Hledger/Cli/Options.hs @@ -363,7 +363,7 @@ getEnvSafe v = getEnv v `catch` (\_ -> return "") getDirectoryContentsSafe d = getDirectoryContents d `catch` (\_ -> return []) -- | Convert possibly encoded option values to regular unicode strings. -decodeRawOpts = map (\(name,val) -> (name, fromPlatformString val)) +decodeRawOpts = map (\(name,val) -> (name, fromSystemString val)) -- A hacky workaround for http://code.google.com/p/ndmitchell/issues/detail?id=470 : -- we'd like to permit options before COMMAND as well as after it. diff --git a/hledger/Hledger/Cli/Print.hs b/hledger/Hledger/Cli/Print.hs index d2294e472..f24f3966f 100644 --- a/hledger/Hledger/Cli/Print.hs +++ b/hledger/Hledger/Cli/Print.hs @@ -12,7 +12,7 @@ import Data.List import Hledger import Prelude hiding (putStr) -import Hledger.Utils.UTF8 (putStr) +import Hledger.Utils.UTF8IOCompat (putStr) import Hledger.Cli.Options -- | Print journal transactions in standard format. diff --git a/hledger/Hledger/Cli/Register.hs b/hledger/Hledger/Cli/Register.hs index 92dabe1c3..45355ea37 100644 --- a/hledger/Hledger/Cli/Register.hs +++ b/hledger/Hledger/Cli/Register.hs @@ -18,7 +18,7 @@ import Text.Printf import Hledger import Prelude hiding (putStr) -import Hledger.Utils.UTF8 (putStr) +import Hledger.Utils.UTF8IOCompat (putStr) import Hledger.Cli.Options diff --git a/hledger/Hledger/Cli/Stats.hs b/hledger/Hledger/Cli/Stats.hs index 31704ce12..1527deb65 100644 --- a/hledger/Hledger/Cli/Stats.hs +++ b/hledger/Hledger/Cli/Stats.hs @@ -16,7 +16,7 @@ import qualified Data.Map as Map import Hledger import Hledger.Cli.Options import Prelude hiding (putStr) -import Hledger.Utils.UTF8 (putStr) +import Hledger.Utils.UTF8IOCompat (putStr) -- like Register.summarisePostings