From 5b5e5eeaf442ada63ef967cf40363d6380e93244 Mon Sep 17 00:00:00 2001 From: Simon Michael Date: Tue, 28 Apr 2015 14:06:22 -0700 Subject: [PATCH] register: wide-character-aware layout (#242) Wide characters, eg chinese/japanese/korean characters, are typically rendered wider than latin characters. In some applications (eg gnome terminal or osx terminal) and fonts (eg monaco) they are exactly double width. This is a start at making hledger aware of this. A register report containing wide characters (in descriptions, account names, or commodity symbols) should now align its columns correctly, when viewed with a suitable font and application. --- hledger-lib/Hledger/Data/AccountName.hs | 8 +-- hledger-lib/Hledger/Utils/String.hs | 93 +++++++++++++++++++++++++ hledger/Hledger/Cli/Register.hs | 39 +++++++---- tests/nonascii/wide-char-layout.test | 31 +++++++++ 4 files changed, 153 insertions(+), 18 deletions(-) create mode 100644 tests/nonascii/wide-char-layout.test diff --git a/hledger-lib/Hledger/Data/AccountName.hs b/hledger-lib/Hledger/Data/AccountName.hs index ec25ed133..108a69867 100644 --- a/hledger-lib/Hledger/Data/AccountName.hs +++ b/hledger-lib/Hledger/Data/AccountName.hs @@ -118,17 +118,17 @@ elideAccountName width s names = splitOn ", " $ take (length s - 8) s widthpername = (max 0 (width - 8 - 2 * (max 1 (length names) - 1))) `div` length names in - elideLeft width $ + elideLeftWidth width False $ (++" (split)") $ intercalate ", " $ [accountNameFromComponents $ elideparts widthpername [] $ accountNameComponents s' | s' <- names] | otherwise = - elideLeft width $ accountNameFromComponents $ elideparts width [] $ accountNameComponents s + elideLeftWidth width False $ accountNameFromComponents $ elideparts width [] $ accountNameComponents s where elideparts :: Int -> [String] -> [String] -> [String] elideparts width done ss - | length (accountNameFromComponents $ done++ss) <= width = done++ss - | length ss > 1 = elideparts width (done++[take 2 $ head ss]) (tail ss) + | strWidth (accountNameFromComponents $ done++ss) <= width = done++ss + | length ss > 1 = elideparts width (done++[takeWidth 2 $ head ss]) (tail ss) | otherwise = done++ss -- | Keep only the first n components of an account name, where n diff --git a/hledger-lib/Hledger/Utils/String.hs b/hledger-lib/Hledger/Utils/String.hs index 41382e7fc..003b8fbba 100644 --- a/hledger-lib/Hledger/Utils/String.hs +++ b/hledger-lib/Hledger/Utils/String.hs @@ -1,3 +1,5 @@ +-- | String formatting helpers, starting to get a bit out of control. + module Hledger.Utils.String ( -- * misc lowercase, @@ -27,6 +29,11 @@ module Hledger.Utils.String ( elideLeft, elideRight, formatString, + -- * wide-character-aware single-line layout + strWidth, + takeWidth, + elideLeftWidth, + elideRightWidth, -- * multi-line layout concatTopPadded, concatBottomPadded, @@ -251,3 +258,89 @@ fitto w h s = intercalate "\n" $ take h $ rows ++ repeat blankline fit w = take w . (++ repeat ' ') blankline = replicate w ' ' +-- Functions below are aware of double-width characters eg in CJK text. + +-- | Wide-character-aware string clipping to the specified width, with an ellipsis on the right. +-- When the second argument is true, also right-pad with spaces to the specified width if needed. +elideLeftWidth :: Int -> Bool -> String -> String +elideLeftWidth width pad s + | strWidth s > width = ellipsis ++ reverse (takeWidth (width - length ellipsis) $ reverse s) + | otherwise = reverse (takeWidth width $ reverse s ++ padding) + where + ellipsis = ".." + padding = if pad then repeat ' ' else "" + +-- | Wide-character-aware string clipping to the specified width, with an ellipsis on the left. +-- When the second argument is true, also left-pad with spaces to the specified width if needed. +elideRightWidth :: Int -> Bool -> String -> String +elideRightWidth width pad s + | strWidth s > width = takeWidth (width - length ellipsis) s ++ ellipsis + | otherwise = takeWidth width $ s ++ padding + where + ellipsis = ".." + padding = if pad then repeat ' ' else "" + +-- | Double-width-character-aware string truncation. Take as many +-- characters as possible from a string without exceeding the +-- specified width. Eg takeWidth 3 "りんご" = "り". +takeWidth :: Int -> String -> String +takeWidth _ "" = "" +takeWidth 0 _ = "" +takeWidth w (c:cs) | cw <= w = c:takeWidth (w-cw) cs + | otherwise = "" + where cw = charWidth c + +-- from Pandoc (copyright John MacFarlane, GPL) +-- see also http://unicode.org/reports/tr11/#Description + +-- | Get real length of string, taking into account combining and +-- double-width characters. +strWidth :: String -> Int +strWidth = foldr (\a b -> charWidth a + b) 0 + +-- | Returns the width of a character in a monospace font: 0 for a +-- combining character, 1 for a regular character, 2 for an East Asian +-- wide character. +charWidth :: Char -> Int +charWidth c = + case c of + _ | c < '\x0300' -> 1 + | c >= '\x0300' && c <= '\x036F' -> 0 -- combining + | c >= '\x0370' && c <= '\x10FC' -> 1 + | c >= '\x1100' && c <= '\x115F' -> 2 + | c >= '\x1160' && c <= '\x11A2' -> 1 + | c >= '\x11A3' && c <= '\x11A7' -> 2 + | c >= '\x11A8' && c <= '\x11F9' -> 1 + | c >= '\x11FA' && c <= '\x11FF' -> 2 + | c >= '\x1200' && c <= '\x2328' -> 1 + | c >= '\x2329' && c <= '\x232A' -> 2 + | c >= '\x232B' && c <= '\x2E31' -> 1 + | c >= '\x2E80' && c <= '\x303E' -> 2 + | c == '\x303F' -> 1 + | c >= '\x3041' && c <= '\x3247' -> 2 + | c >= '\x3248' && c <= '\x324F' -> 1 -- ambiguous + | c >= '\x3250' && c <= '\x4DBF' -> 2 + | c >= '\x4DC0' && c <= '\x4DFF' -> 1 + | c >= '\x4E00' && c <= '\xA4C6' -> 2 + | c >= '\xA4D0' && c <= '\xA95F' -> 1 + | c >= '\xA960' && c <= '\xA97C' -> 2 + | c >= '\xA980' && c <= '\xABF9' -> 1 + | c >= '\xAC00' && c <= '\xD7FB' -> 2 + | c >= '\xD800' && c <= '\xDFFF' -> 1 + | c >= '\xE000' && c <= '\xF8FF' -> 1 -- ambiguous + | c >= '\xF900' && c <= '\xFAFF' -> 2 + | c >= '\xFB00' && c <= '\xFDFD' -> 1 + | c >= '\xFE00' && c <= '\xFE0F' -> 1 -- ambiguous + | c >= '\xFE10' && c <= '\xFE19' -> 2 + | c >= '\xFE20' && c <= '\xFE26' -> 1 + | c >= '\xFE30' && c <= '\xFE6B' -> 2 + | c >= '\xFE70' && c <= '\xFEFF' -> 1 + | c >= '\xFF01' && c <= '\xFF60' -> 2 + | c >= '\xFF61' && c <= '\x16A38' -> 1 + | c >= '\x1B000' && c <= '\x1B001' -> 2 + | c >= '\x1D000' && c <= '\x1F1FF' -> 1 + | c >= '\x1F200' && c <= '\x1F251' -> 2 + | c >= '\x1F300' && c <= '\x1F773' -> 1 + | c >= '\x20000' && c <= '\x3FFFD' -> 2 + | otherwise -> 1 + diff --git a/hledger/Hledger/Cli/Register.hs b/hledger/Hledger/Cli/Register.hs index 411d48e19..ba21bcaed 100644 --- a/hledger/Hledger/Cli/Register.hs +++ b/hledger/Hledger/Cli/Register.hs @@ -19,7 +19,6 @@ import Data.Maybe import System.Console.CmdArgs.Explicit import Text.CSV import Test.HUnit -import Text.Printf import Hledger import Hledger.Cli.CliOptions @@ -119,14 +118,29 @@ tests_postingsReportAsText = [ -- postingsReportItemAsText :: CliOpts -> PostingsReportItem -> String postingsReportItemAsText opts (mdate, menddate, mdesc, p, b) = + -- use elide*Width to be wide-char-aware intercalate "\n" $ - [printf ("%-"++datew++"s %-"++descw++"s %-"++acctw++"s %"++amtw++"s %"++balw++"s") - date desc acct amtfirstline balfirstline] + [concat [elideRightWidth datewidth True date + ," " + ,elideRightWidth descwidth True desc + ," " + ,elideRightWidth acctwidth True acct + ," " + ,elideLeftWidth amtwidth True amtfirstline + ," " + ,elideLeftWidth balwidth True balfirstline + ]] ++ - [printf (spacer ++ "%"++amtw++"s %"++balw++"s") a b | (a,b) <- zip amtrest balrest ] - + [concat [spacer + ,elideLeftWidth amtwidth True a + ," " + ,elideLeftWidth balwidth True b + ] + | (a,b) <- zip amtrest balrest + ] where -- calculate widths + -- XXX should be smarter, eg resize amount columns when needed; cf hledger-ui (totalwidth,mdescwidth) = registerWidthsFromOpts opts amtwidth = 12 balwidth = 12 @@ -142,16 +156,16 @@ postingsReportItemAsText opts (mdate, menddate, mdesc, p, b) = where hasinterval = isJust menddate w = fromMaybe ((remaining - 2) `div` 2) mdescwidth - [datew,descw,acctw,amtw,balw] = map show [datewidth,descwidth,acctwidth,amtwidth,balwidth] -- gather content - desc = maybe "" (take descwidth . elideRight descwidth) mdesc + desc = fromMaybe "" mdesc acct = parenthesise $ elideAccountName awidth $ paccount p where - (parenthesise, awidth) = case ptype p of - BalancedVirtualPosting -> (\s -> "["++s++"]", acctwidth-2) - VirtualPosting -> (\s -> "("++s++")", acctwidth-2) - _ -> (id,acctwidth) + (parenthesise, awidth) = + case ptype p of + BalancedVirtualPosting -> (\s -> "["++s++"]", acctwidth-2) + VirtualPosting -> (\s -> "("++s++")", acctwidth-2) + _ -> (id,acctwidth) amt = showMixedAmountWithoutPrice $ pamount p bal = showMixedAmountWithoutPrice b -- alternate behaviour, show null amounts as 0 instead of blank @@ -164,9 +178,6 @@ postingsReportItemAsText opts (mdate, menddate, mdesc, p, b) = (balfirstline:balrest) = take numlines $ replicate (numlines - ballen) "" ++ ballines -- balance amount is bottom-aligned spacer = replicate (totalwidth - (amtwidth + 2 + balwidth)) ' ' --- XXX --- showPostingWithBalanceForVty showtxninfo p b = postingsReportItemAsText defreportopts $ mkpostingsReportItem showtxninfo p b - tests_Hledger_Cli_Register :: Test tests_Hledger_Cli_Register = TestList tests_postingsReportAsText diff --git a/tests/nonascii/wide-char-layout.test b/tests/nonascii/wide-char-layout.test new file mode 100644 index 000000000..8cb259401 --- /dev/null +++ b/tests/nonascii/wide-char-layout.test @@ -0,0 +1,31 @@ +# alignment calculations should handle wide characters + +# 1. register, account name +hledger -f - register +<<< +1/1 + 知 1 + b +>>> +2015/01/01 知 1 1 + b -1 0 +>>>=0 + +# # 2. balance, commodity symbol +# hledger -f - balance +# <<< +# 1/1 +# a 知1 +# b $-1 +# >>> +# 知1 a +# $-1 b +# -------------------- +# $-1 +# 知1 +# >>>=0 + +# import Text.Data.ICU.Char +# case property EastAsianWidth c of +# Wide -> 2 +# _ -> 1