fix unicode handling on GHC >= 7.2, unify utf8 IO compatibility layer
tests pass again from GHC 6.12.3 to 7.4.1
This commit is contained in:
		
							parent
							
								
									d4451ce5e3
								
							
						
					
					
						commit
						8492f6cae4
					
				| @ -41,7 +41,7 @@ import Hledger.Read.TimelogReader as TimelogReader | ||||
| import Hledger.Read.CsvReader as CsvReader | ||||
| import Hledger.Utils | ||||
| import Prelude hiding (getContents, writeFile) | ||||
| import Hledger.Utils.UTF8 (getContents, hGetContents, writeFile) | ||||
| import Hledger.Utils.UTF8IOCompat (getContents, hGetContents, writeFile) | ||||
| 
 | ||||
| 
 | ||||
| journalEnvVar           = "LEDGER_FILE" | ||||
|  | ||||
| @ -51,7 +51,7 @@ import Text.Printf (hPrintf) | ||||
| 
 | ||||
| import Hledger.Data | ||||
| import Prelude hiding (getContents) | ||||
| import Hledger.Utils.UTF8 (getContents) | ||||
| import Hledger.Utils.UTF8IOCompat (getContents) | ||||
| import Hledger.Utils | ||||
| import Hledger.Data.FormatStrings as FormatStrings | ||||
| import Hledger.Read.JournalReader (ledgeraccountname, someamount) | ||||
|  | ||||
| @ -54,7 +54,7 @@ import System.Time (getClockTime) | ||||
| import Hledger.Data | ||||
| import Hledger.Utils | ||||
| import Prelude hiding (readFile) | ||||
| import Hledger.Utils.UTF8 (readFile) | ||||
| import Hledger.Utils.UTF8IOCompat (readFile) | ||||
| 
 | ||||
| 
 | ||||
| -- let's get to it | ||||
|  | ||||
| @ -1,3 +1,4 @@ | ||||
| {-# LANGUAGE CPP #-} | ||||
| {-| | ||||
| 
 | ||||
| Standard imports and utilities which are useful everywhere, or needed low | ||||
| @ -19,12 +20,12 @@ module Hledger.Utils (---- provide these frequently used modules - or not, for c | ||||
|                           -- module Text.Printf, | ||||
|                           ---- all of this one: | ||||
|                           module Hledger.Utils, | ||||
|                           Debug.Trace.trace | ||||
|                           ---- and this for i18n - needs to be done in each module I think: | ||||
|                           -- module Hledger.Utils.UTF8 | ||||
|                           Debug.Trace.trace, | ||||
|                           -- module Hledger.Utils.UTF8IOCompat | ||||
|                           SystemString,fromSystemString,toSystemString,error',userError' | ||||
|                           -- the rest need to be done in each module I think | ||||
|                           ) | ||||
| where | ||||
| import Codec.Binary.UTF8.String as UTF8 (decodeString, encodeString, isUTF8Encoded) | ||||
| import Control.Monad.Error | ||||
| import Data.Char | ||||
| import Data.List | ||||
| @ -35,15 +36,15 @@ import Data.Tree | ||||
| import Debug.Trace | ||||
| import System.Directory (getHomeDirectory) | ||||
| import System.FilePath(takeDirectory,combine) | ||||
| import System.Info (os) | ||||
| import Test.HUnit | ||||
| import Text.ParserCombinators.Parsec | ||||
| import Text.Printf | ||||
| import Text.RegexPR | ||||
| -- import qualified Data.Map as Map | ||||
| --  | ||||
| -- import Prelude hiding (readFile,writeFile,getContents,putStr,putStrLn) | ||||
| -- import Hledger.Utils.UTF8 | ||||
| -- import Prelude hiding (readFile,writeFile,appendFile,getContents,putStr,putStrLn) | ||||
| -- import Hledger.Utils.UTF8IOCompat   (readFile,writeFile,appendFile,getContents,putStr,putStrLn) | ||||
| import Hledger.Utils.UTF8IOCompat (SystemString,fromSystemString,toSystemString,error',userError') | ||||
| 
 | ||||
| -- strings | ||||
| 
 | ||||
| @ -183,41 +184,6 @@ fitto w h s = intercalate "\n" $ take h $ rows ++ repeat blankline | ||||
|       fit w = take w . (++ repeat ' ') | ||||
|       blankline = replicate w ' ' | ||||
| 
 | ||||
| -- encoded platform strings | ||||
| 
 | ||||
| -- | A platform string is a string value from or for the operating system, | ||||
| -- such as a file path or command-line argument (or environment variable's | ||||
| -- name or value ?). On some platforms (such as unix) these are not real | ||||
| -- unicode strings but have some encoding such as UTF-8. This alias does | ||||
| -- no type enforcement but aids code clarity. | ||||
| type PlatformString = String | ||||
| 
 | ||||
| -- | Convert a possibly encoded platform string to a real unicode string. | ||||
| -- We decode the UTF-8 encoding recommended for unix systems | ||||
| -- (cf http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html) | ||||
| -- and leave anything else unchanged. | ||||
| fromPlatformString :: PlatformString -> String | ||||
| fromPlatformString s = if UTF8.isUTF8Encoded s then UTF8.decodeString s else s | ||||
| 
 | ||||
| -- | Convert a unicode string to a possibly encoded platform string. | ||||
| -- On unix we encode with the recommended UTF-8 | ||||
| -- (cf http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html) | ||||
| -- and elsewhere we leave it unchanged. | ||||
| toPlatformString :: String -> PlatformString | ||||
| toPlatformString = case os of | ||||
|                      "unix" -> UTF8.encodeString | ||||
|                      "linux" -> UTF8.encodeString | ||||
|                      "darwin" -> UTF8.encodeString | ||||
|                      _ -> id | ||||
| 
 | ||||
| -- | A version of error that's better at displaying unicode. | ||||
| error' :: String -> a | ||||
| error' = error . toPlatformString | ||||
| 
 | ||||
| -- | A version of userError that's better at displaying unicode. | ||||
| userError' :: String -> IOError | ||||
| userError' = userError . toPlatformString | ||||
| 
 | ||||
| -- math | ||||
| 
 | ||||
| difforzero :: (Num a, Ord a) => a -> a -> a | ||||
|  | ||||
| @ -1,87 +0,0 @@ | ||||
| {- | ||||
| From pandoc, slightly extended. Example usage: | ||||
| 
 | ||||
|  import Prelude hiding (readFile,writeFile,getContents,putStr,putStrLn) | ||||
|  import Hledger.Utils.UTF8 (readFile,writeFile,getContents,putStr,putStrLn) | ||||
| 
 | ||||
| 
 | ||||
| ---------------------------------------------------------------------- | ||||
| Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu> | ||||
| 
 | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with this program; if not, write to the Free Software | ||||
| Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA | ||||
| -} | ||||
| 
 | ||||
| {- | | ||||
|    Module      : Text.Pandoc.UTF8 | ||||
|    Copyright   : Copyright (C) 2010 John MacFarlane | ||||
|    License     : GNU GPL, version 2 or above  | ||||
| 
 | ||||
|    Maintainer  : John MacFarlane <jgm@berkeley.edu> | ||||
|    Stability   : alpha | ||||
|    Portability : portable | ||||
| 
 | ||||
| UTF-8 aware string IO functions that will work with GHC 6.10 or 6.12. | ||||
| -} | ||||
| module Hledger.Utils.UTF8 ( readFile | ||||
|                          , writeFile | ||||
|                          , appendFile | ||||
|                          , getContents | ||||
|                          , hGetContents | ||||
|                          , putStr | ||||
|                          , putStrLn | ||||
|                          , hPutStr | ||||
|                          , hPutStrLn | ||||
|                          ) | ||||
| 
 | ||||
| where | ||||
| import qualified Data.ByteString.Lazy as B | ||||
| import Data.ByteString.Lazy.UTF8 (toString, fromString) | ||||
| import Prelude hiding (readFile, writeFile, appendFile, getContents, putStr, putStrLn) | ||||
| import System.IO (Handle) | ||||
| import Control.Monad (liftM) | ||||
| 
 | ||||
| bom :: B.ByteString | ||||
| bom = B.pack [0xEF, 0xBB, 0xBF] | ||||
| 
 | ||||
| stripBOM :: B.ByteString -> B.ByteString | ||||
| stripBOM s | bom `B.isPrefixOf` s = B.drop 3 s | ||||
| stripBOM s = s | ||||
| 
 | ||||
| readFile :: FilePath -> IO String | ||||
| readFile = liftM (toString . stripBOM) . B.readFile | ||||
| 
 | ||||
| writeFile :: FilePath -> String -> IO () | ||||
| writeFile f = B.writeFile f . fromString | ||||
| 
 | ||||
| appendFile :: FilePath -> String -> IO () | ||||
| appendFile f = B.appendFile f . fromString | ||||
| 
 | ||||
| getContents :: IO String | ||||
| getContents = liftM (toString . stripBOM) B.getContents | ||||
| 
 | ||||
| hGetContents :: Handle -> IO String | ||||
| hGetContents h = liftM (toString . stripBOM) (B.hGetContents h) | ||||
| 
 | ||||
| putStr :: String -> IO () | ||||
| putStr = B.putStr . fromString | ||||
| 
 | ||||
| putStrLn :: String -> IO () | ||||
| putStrLn = B.putStrLn . fromString | ||||
| 
 | ||||
| hPutStr :: Handle -> String -> IO () | ||||
| hPutStr h = B.hPutStr h . fromString | ||||
| 
 | ||||
| hPutStrLn :: Handle -> String -> IO () | ||||
| hPutStrLn h s = hPutStr h (s ++ "\n") | ||||
							
								
								
									
										114
									
								
								hledger-lib/Hledger/Utils/UTF8IOCompat.hs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								hledger-lib/Hledger/Utils/UTF8IOCompat.hs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,114 @@ | ||||
| {-# LANGUAGE CPP #-} | ||||
| {- | ||||
| 
 | ||||
| UTF-8 aware string IO functions that will work across multiple platforms | ||||
| and GHC versions. Includes code from Text.Pandoc.UTF8 ((C) 2010 John | ||||
| MacFarlane <jgm@berkeley.edu>). | ||||
| 
 | ||||
| Example usage: | ||||
| 
 | ||||
|  import Prelude hiding (readFile,writeFile,appendFile,getContents,putStr,putStrLn) | ||||
|  import UTF8IOCompat   (readFile,writeFile,appendFile,getContents,putStr,putStrLn) | ||||
|  import UTF8IOCompat   (SystemString,fromSystemString,toSystemString,error',userError') | ||||
| 
 | ||||
| -} | ||||
| 
 | ||||
| module Hledger.Utils.UTF8IOCompat ( | ||||
|   readFile, | ||||
|   writeFile, | ||||
|   appendFile, | ||||
|   getContents, | ||||
|   hGetContents, | ||||
|   putStr, | ||||
|   putStrLn, | ||||
|   hPutStr, | ||||
|   hPutStrLn, | ||||
|   -- | ||||
|   SystemString, | ||||
|   fromSystemString, | ||||
|   toSystemString, | ||||
|   error', | ||||
|   userError', | ||||
| ) | ||||
| where | ||||
| 
 | ||||
| import qualified Data.ByteString.Lazy as B | ||||
| import Data.ByteString.Lazy.UTF8 (toString, fromString) | ||||
| import Prelude hiding (readFile, writeFile, appendFile, getContents, putStr, putStrLn) | ||||
| import System.IO (Handle) | ||||
| import Control.Monad (liftM) | ||||
| #if __GLASGOW_HASKELL__ < 702 | ||||
| import Codec.Binary.UTF8.String as UTF8 (decodeString, encodeString, isUTF8Encoded) | ||||
| import System.Info (os) | ||||
| #endif | ||||
| 
 | ||||
| bom :: B.ByteString | ||||
| bom = B.pack [0xEF, 0xBB, 0xBF] | ||||
| 
 | ||||
| stripBOM :: B.ByteString -> B.ByteString | ||||
| stripBOM s | bom `B.isPrefixOf` s = B.drop 3 s | ||||
| stripBOM s = s | ||||
| 
 | ||||
| readFile :: FilePath -> IO String | ||||
| readFile = liftM (toString . stripBOM) . B.readFile | ||||
| 
 | ||||
| writeFile :: FilePath -> String -> IO () | ||||
| writeFile f = B.writeFile f . fromString | ||||
| 
 | ||||
| appendFile :: FilePath -> String -> IO () | ||||
| appendFile f = B.appendFile f . fromString | ||||
| 
 | ||||
| getContents :: IO String | ||||
| getContents = liftM (toString . stripBOM) B.getContents | ||||
| 
 | ||||
| hGetContents :: Handle -> IO String | ||||
| hGetContents h = liftM (toString . stripBOM) (B.hGetContents h) | ||||
| 
 | ||||
| putStr :: String -> IO () | ||||
| putStr = B.putStr . fromString | ||||
| 
 | ||||
| putStrLn :: String -> IO () | ||||
| putStrLn = B.putStrLn . fromString | ||||
| 
 | ||||
| hPutStr :: Handle -> String -> IO () | ||||
| hPutStr h = B.hPutStr h . fromString | ||||
| 
 | ||||
| hPutStrLn :: Handle -> String -> IO () | ||||
| hPutStrLn h s = hPutStr h (s ++ "\n") | ||||
| 
 | ||||
| -- | A string received from or being passed to the operating system, such | ||||
| -- as a file path, command-line argument, or environment variable name or | ||||
| -- value. With GHC versions before 7.2 on some platforms (posix) these are | ||||
| -- typically encoded. When converting, we assume the encoding is UTF-8 (cf | ||||
| -- http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html#UTF8). | ||||
| type SystemString = String | ||||
| 
 | ||||
| -- | Convert a system string to an ordinary string, decoding from UTF-8 if | ||||
| -- it appears to be UTF8-encoded and GHC version is less than 7.2. | ||||
| fromSystemString :: SystemString -> String | ||||
| #if __GLASGOW_HASKELL__ < 702 | ||||
| fromSystemString s = if UTF8.isUTF8Encoded s then UTF8.decodeString s else s | ||||
| #else | ||||
| fromSystemString = id | ||||
| #endif | ||||
| 
 | ||||
| -- | Convert a unicode string to a system string, encoding with UTF-8 if | ||||
| -- we are on a posix platform with GHC < 7.2. | ||||
| toSystemString :: String -> SystemString | ||||
| #if __GLASGOW_HASKELL__ < 702 | ||||
| toSystemString = case os of | ||||
|                      "unix" -> UTF8.encodeString | ||||
|                      "linux" -> UTF8.encodeString | ||||
|                      "darwin" -> UTF8.encodeString | ||||
|                      _ -> id | ||||
| #else | ||||
| toSystemString = id | ||||
| #endif | ||||
| 
 | ||||
| -- | A SystemString-aware version of error. | ||||
| error' :: String -> a | ||||
| error' = error . toSystemString | ||||
| 
 | ||||
| -- | A SystemString-aware version of userError. | ||||
| userError' :: String -> IOError | ||||
| userError' = userError . toSystemString | ||||
| @ -52,7 +52,7 @@ library | ||||
|                   Hledger.Read.Utils | ||||
|                   Hledger.Reports | ||||
|                   Hledger.Utils | ||||
|                   Hledger.Utils.UTF8 | ||||
|                   Hledger.Utils.UTF8IOCompat | ||||
|   Build-Depends: | ||||
|                   base >= 3 && < 5 | ||||
|                  ,bytestring | ||||
|  | ||||
| @ -26,7 +26,7 @@ import Yesod.Logger (makeLogger) | ||||
| import Hledger | ||||
| import Hledger.Cli hiding (progname,prognameandversion) | ||||
| import Prelude hiding (putStrLn) | ||||
| import Hledger.Utils.UTF8 (putStrLn) | ||||
| import Hledger.Utils.UTF8IOCompat (putStrLn) | ||||
| import Hledger.Web | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -29,7 +29,7 @@ import qualified Data.Set as Set | ||||
| 
 | ||||
| import Hledger | ||||
| import Prelude hiding (putStr, putStrLn, appendFile) | ||||
| import Hledger.Utils.UTF8 (putStr, putStrLn, appendFile) | ||||
| import Hledger.Utils.UTF8IOCompat (putStr, putStrLn, appendFile) | ||||
| import Hledger.Cli.Options | ||||
| import Hledger.Cli.Register (postingsReportAsText) | ||||
| import Hledger.Cli.Utils | ||||
|  | ||||
| @ -105,7 +105,7 @@ import Test.HUnit | ||||
| 
 | ||||
| import Hledger | ||||
| import Prelude hiding (putStr) | ||||
| import Hledger.Utils.UTF8 (putStr) | ||||
| import Hledger.Utils.UTF8IOCompat (putStr) | ||||
| import Hledger.Data.FormatStrings | ||||
| import qualified Hledger.Data.FormatStrings as Format | ||||
| import Hledger.Cli.Options | ||||
|  | ||||
| @ -15,7 +15,7 @@ import Hledger.Cli.Options | ||||
| import Hledger.Data | ||||
| import Hledger.Reports | ||||
| import Prelude hiding (putStr) | ||||
| import Hledger.Utils.UTF8 (putStr) | ||||
| import Hledger.Utils.UTF8IOCompat (putStr) | ||||
| 
 | ||||
| 
 | ||||
| barchar = '*' | ||||
|  | ||||
| @ -363,7 +363,7 @@ getEnvSafe v = getEnv v `catch` (\_ -> return "") | ||||
| getDirectoryContentsSafe d = getDirectoryContents d `catch` (\_ -> return []) | ||||
| 
 | ||||
| -- | Convert possibly encoded option values to regular unicode strings. | ||||
| decodeRawOpts = map (\(name,val) -> (name, fromPlatformString val)) | ||||
| decodeRawOpts = map (\(name,val) -> (name, fromSystemString val)) | ||||
| 
 | ||||
| -- A hacky workaround for http://code.google.com/p/ndmitchell/issues/detail?id=470 : | ||||
| -- we'd like to permit options before COMMAND as well as after it. | ||||
|  | ||||
| @ -12,7 +12,7 @@ import Data.List | ||||
| 
 | ||||
| import Hledger | ||||
| import Prelude hiding (putStr) | ||||
| import Hledger.Utils.UTF8 (putStr) | ||||
| import Hledger.Utils.UTF8IOCompat (putStr) | ||||
| import Hledger.Cli.Options | ||||
| 
 | ||||
| -- | Print journal transactions in standard format. | ||||
|  | ||||
| @ -18,7 +18,7 @@ import Text.Printf | ||||
| 
 | ||||
| import Hledger | ||||
| import Prelude hiding (putStr) | ||||
| import Hledger.Utils.UTF8 (putStr) | ||||
| import Hledger.Utils.UTF8IOCompat (putStr) | ||||
| import Hledger.Cli.Options | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -16,7 +16,7 @@ import qualified Data.Map as Map | ||||
| import Hledger | ||||
| import Hledger.Cli.Options | ||||
| import Prelude hiding (putStr) | ||||
| import Hledger.Utils.UTF8 (putStr) | ||||
| import Hledger.Utils.UTF8IOCompat (putStr) | ||||
| 
 | ||||
| 
 | ||||
| -- like Register.summarisePostings | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user