From c31710d942b6447461846adc7f83e64943fde8eb Mon Sep 17 00:00:00 2001 From: Simon Michael Date: Wed, 2 Jul 2014 23:26:16 -0700 Subject: [PATCH] look harder for decimal point & digit groups (fixes #196) Amount display styles have been reworked a bit; they are now calculated after journal parsing, not during it. This allows the fix for #196: we now search through the amounts until a decimal point is detected, instead of just looking at the first one; likewise for digit groups. Digit groups are now implemented with a better type. Digit group size detection has been improved a little: 1000,000 now gives group sizes [3,4,4,...], not [3,3,...], and 10,000 gives groups sizes [3,3,...] not [3,2,2,..]. (To get [3,2,2,...] you'd use eg 00,00,000.) There are still some old (or new ?) issues; I don't think we handle inconsistent decimal points & digit groups too well. But for now all tests pass. --- hledger-lib/Hledger/Data/Amount.hs | 61 +++++-------- hledger-lib/Hledger/Data/Journal.hs | 25 +++++- hledger-lib/Hledger/Data/Types.hs | 14 ++- hledger-lib/Hledger/Read/JournalReader.hs | 101 ++++++++++++---------- tests/journal/default-commodity.test | 24 ++--- 5 files changed, 122 insertions(+), 103 deletions(-) diff --git a/hledger-lib/Hledger/Data/Amount.hs b/hledger-lib/Hledger/Data/Amount.hs index 0ccac4e7c..f6194bc2c 100644 --- a/hledger-lib/Hledger/Data/Amount.hs +++ b/hledger-lib/Hledger/Data/Amount.hs @@ -68,7 +68,6 @@ module Hledger.Data.Amount ( setAmountPrecision, withPrecision, canonicaliseAmount, - canonicalStyles, -- * MixedAmount nullmixedamt, missingmixedamt, @@ -99,7 +98,7 @@ module Hledger.Data.Amount ( import Data.Char (isDigit) import Data.List import Data.Map (findWithDefault) -import Data.Ord (comparing) +import Data.Maybe import Test.HUnit import Text.Printf import qualified Data.Map as M @@ -111,7 +110,7 @@ import Hledger.Utils deriving instance Show HistoricalPrice -amountstyle = AmountStyle L False 0 '.' ',' [] +amountstyle = AmountStyle L False 0 (Just '.') Nothing ------------------------------------------------------------------------------- -- Amount @@ -281,33 +280,38 @@ showAmount a@(Amount{acommodity=c, aprice=p, astyle=AmountStyle{..}}) = -- | Get the string representation of the number part of of an amount, -- using the display settings from its commodity. showamountquantity :: Amount -> String -showamountquantity Amount{aquantity=q, astyle=AmountStyle{asprecision=p, asdecimalpoint=d, asseparator=s, asseparatorpositions=spos}} = - punctuatenumber d s spos $ qstr +showamountquantity Amount{aquantity=q, astyle=AmountStyle{asprecision=p, asdecimalpoint=mdec, asdigitgroups=mgrps}} = + punctuatenumber (fromMaybe '.' mdec) mgrps $ qstr where - -- isint n = fromIntegral (round n) == n - qstr -- p == maxprecision && isint q = printf "%d" (round q::Integer) - | p == maxprecisionwithpoint = printf "%f" q - | p == maxprecision = chopdotzero $ printf "%f" q - | otherwise = printf ("%."++show p++"f") q + -- isint n = fromIntegral (round n) == n + qstr -- p == maxprecision && isint q = printf "%d" (round q::Integer) + | p == maxprecisionwithpoint = printf "%f" q + | p == maxprecision = chopdotzero $ printf "%f" q + | otherwise = printf ("%."++show p++"f") q -- | Replace a number string's decimal point with the specified character, -- and add the specified digit group separators. The last digit group will -- be repeated as needed. -punctuatenumber :: Char -> Char -> [Int] -> String -> String -punctuatenumber dec sep grps str = sign ++ reverse (addseps sep (extend grps) (reverse int)) ++ frac'' +punctuatenumber :: Char -> Maybe DigitGroupStyle -> String -> String +punctuatenumber dec mgrps s = sign ++ reverse (applyDigitGroupStyle mgrps (reverse int)) ++ frac'' where - (sign,num) = break isDigit str + (sign,num) = break isDigit s (int,frac) = break (=='.') num frac' = dropWhile (=='.') frac frac'' | null frac' = "" | otherwise = dec:frac' - extend [] = [] - extend gs = init gs ++ repeat (last gs) - addseps _ [] str = str - addseps sep (g:gs) str - | length str <= g = str - | otherwise = let (s,rest) = splitAt g str - in s ++ [sep] ++ addseps sep gs rest + +applyDigitGroupStyle :: Maybe DigitGroupStyle -> String -> String +applyDigitGroupStyle Nothing s = s +applyDigitGroupStyle (Just (DigitGroups c gs)) s = addseps (repeatLast gs) s + where + addseps [] s = s + addseps (g:gs) s + | length s <= g = s + | otherwise = let (part,rest) = splitAt g s + in part ++ [c] ++ addseps gs rest + repeatLast [] = [] + repeatLast gs = init gs ++ repeat (last gs) chopdotzero str = reverse $ case reverse str of '0':'.':s -> s @@ -501,23 +505,6 @@ showMixedAmountWithoutPrice m = concat $ intersperse "\n" $ map showfixedwidth a canonicaliseMixedAmount :: M.Map Commodity AmountStyle -> MixedAmount -> MixedAmount canonicaliseMixedAmount styles (Mixed as) = Mixed $ map (canonicaliseAmount styles) as --- | Given a list of amounts in parse order, build a map from commodities --- to canonical display styles for amounts in that commodity. -canonicalStyles :: [Amount] -> M.Map Commodity AmountStyle -canonicalStyles amts = M.fromList commstyles - where - samecomm = \a1 a2 -> acommodity a1 == acommodity a2 - commamts = [(acommodity $ head as, as) | as <- groupBy samecomm $ sortBy (comparing acommodity) amts] - commstyles = [(c, s) - | (c,as) <- commamts - , let styles = map astyle as - , let maxprec = maximum $ map asprecision styles - , let s = (head styles){asprecision=maxprec} - ] - --- lookupStyle :: M.Map Commodity AmountStyle -> Commodity -> AmountStyle --- lookupStyle - ------------------------------------------------------------------------------- -- misc diff --git a/hledger-lib/Hledger/Data/Journal.hs b/hledger-lib/Hledger/Data/Journal.hs index 5333f23c9..14696693e 100644 --- a/hledger-lib/Hledger/Data/Journal.hs +++ b/hledger-lib/Hledger/Data/Journal.hs @@ -46,6 +46,7 @@ module Hledger.Data.Journal ( journalEquityAccountQuery, journalCashAccountQuery, -- * Misc + canonicalStyles, matchpats, nullctx, nulljournal, @@ -481,12 +482,34 @@ journalCanonicaliseAmounts :: Journal -> Journal journalCanonicaliseAmounts j@Journal{jtxns=ts} = j'' where j'' = j'{jtxns=map fixtransaction ts} - j' = j{jcommoditystyles = canonicalStyles $ journalAmounts j} + j' = j{jcommoditystyles = canonicalStyles $ dbgAt 8 "journalAmounts" $ journalAmounts j} fixtransaction t@Transaction{tpostings=ps} = t{tpostings=map fixposting ps} fixposting p@Posting{pamount=a} = p{pamount=fixmixedamount a} fixmixedamount (Mixed as) = Mixed $ map fixamount as fixamount a@Amount{acommodity=c} = a{astyle=journalCommodityStyle j' c} +-- | Given a list of amounts in parse order, build a map from commodities +-- to canonical display styles for amounts in that commodity. +canonicalStyles :: [Amount] -> M.Map Commodity AmountStyle +canonicalStyles amts = M.fromList commstyles + where + samecomm = \a1 a2 -> acommodity a1 == acommodity a2 + commamts = [(acommodity $ head as, as) | as <- groupBy samecomm $ sortBy (comparing acommodity) amts] + commstyles = [(c, canonicalStyleFrom $ map astyle as) | (c,as) <- commamts] + +-- Given an ordered list of amount styles for a commodity, build a canonical style. +canonicalStyleFrom :: [AmountStyle] -> AmountStyle +canonicalStyleFrom [] = amountstyle +canonicalStyleFrom ss@(first:_) = + first{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps} + where + -- precision is the maximum of all precisions seen + prec = maximum $ map asprecision ss + -- find the first decimal point and the first digit group style seen, + -- or use defaults. + mdec = Just $ headDef '.' $ catMaybes $ map asdecimalpoint ss + mgrps = maybe Nothing Just $ headMay $ catMaybes $ map asdigitgroups ss + -- | Get this journal's canonical amount style for the given commodity, or the null style. journalCommodityStyle :: Journal -> Commodity -> AmountStyle journalCommodityStyle j c = M.findWithDefault amountstyle c $ jcommoditystyles j diff --git a/hledger-lib/Hledger/Data/Types.hs b/hledger-lib/Hledger/Data/Types.hs index 9a7624ff7..d3f0f9cc7 100644 --- a/hledger-lib/Hledger/Data/Types.hs +++ b/hledger-lib/Hledger/Data/Types.hs @@ -56,11 +56,19 @@ data AmountStyle = AmountStyle { ascommodityside :: Side, -- ^ does the symbol appear on the left or the right ? ascommodityspaced :: Bool, -- ^ space between symbol and quantity ? asprecision :: Int, -- ^ number of digits displayed after the decimal point - asdecimalpoint :: Char, -- ^ character used as decimal point - asseparator :: Char, -- ^ character used for separating digit groups (eg thousands) - asseparatorpositions :: [Int] -- ^ positions of digit group separators, counting leftward from decimal point + asdecimalpoint :: Maybe Char, -- ^ character used as decimal point: period or comma. Nothing means "unspecified, use default" + asdigitgroups :: Maybe DigitGroupStyle -- ^ style for displaying digit groups, if any } deriving (Eq,Ord,Read,Show,Typeable,Data) +-- | A style for displaying digit groups in the integer part of a +-- floating point number. It consists of the character used to +-- separate groups (comma or period, whichever is not used as decimal +-- point), and the size of each group, starting with the one nearest +-- the decimal point. The last group size is assumed to repeat. Eg, +-- comma between thousands is DigitGroups ',' [3]. +data DigitGroupStyle = DigitGroups Char [Int] + deriving (Eq,Ord,Read,Show,Typeable,Data) + data Amount = Amount { acommodity :: Commodity, aquantity :: Quantity, diff --git a/hledger-lib/Hledger/Read/JournalReader.hs b/hledger-lib/Hledger/Read/JournalReader.hs index 16c7bf3ba..1e0cc2d91 100644 --- a/hledger-lib/Hledger/Read/JournalReader.hs +++ b/hledger-lib/Hledger/Read/JournalReader.hs @@ -662,8 +662,8 @@ leftsymbolamount = do sign <- signp c <- commoditysymbol sp <- many spacenonewline - (q,prec,dec,sep,seppos) <- numberp - let s = amountstyle{ascommodityside=L, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=dec, asseparator=sep, asseparatorpositions=seppos} + (q,prec,mdec,mgrps) <- numberp + let s = amountstyle{ascommodityside=L, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps} p <- priceamount let applysign = if sign=="-" then negate else id return $ applysign $ Amount c q p s @@ -671,23 +671,23 @@ leftsymbolamount = do rightsymbolamount :: GenParser Char JournalContext Amount rightsymbolamount = do - (q,prec,dec,sep,seppos) <- numberp + (q,prec,mdec,mgrps) <- numberp sp <- many spacenonewline c <- commoditysymbol p <- priceamount - let s = amountstyle{ascommodityside=R, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=dec, asseparator=sep, asseparatorpositions=seppos} + let s = amountstyle{ascommodityside=R, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps} return $ Amount c q p s "right-symbol amount" nosymbolamount :: GenParser Char JournalContext Amount nosymbolamount = do - (q,prec,dec,sep,seppos) <- numberp + (q,prec,mdec,mgrps) <- numberp p <- priceamount -- apply the most recently seen default commodity and style to this commodityless amount defcs <- getDefaultCommodityAndStyle let (c,s) = case defcs of Just (defc,defs) -> (defc, defs{asprecision=max (asprecision defs) prec}) - Nothing -> ("", amountstyle{asprecision=prec, asdecimalpoint=dec, asseparator=sep, asseparatorpositions=seppos}) + Nothing -> ("", amountstyle{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}) return $ Amount c q p s "no-symbol amount" @@ -745,55 +745,68 @@ fixedlotprice = return $ Just a) <|> return Nothing --- | Parse a numeric quantity for its value and display attributes. Some --- international number formats (cf --- http://en.wikipedia.org/wiki/Decimal_separator) are accepted: either --- period or comma may be used for the decimal point, and the other of --- these may be used for separating digit groups in the integer part (eg a --- thousands separator). This returns the numeric value, the precision --- (number of digits to the right of the decimal point), the decimal point --- and separator characters (defaulting to . and ,), and the positions of --- separators (counting leftward from the decimal point, the last is --- assumed to repeat). -numberp :: GenParser Char JournalContext (Quantity, Int, Char, Char, [Int]) +-- | Parse a string representation of a number for its value and display +-- attributes. +-- +-- Some international number formats are accepted, eg either period or comma +-- may be used for the decimal point, and the other of these may be used for +-- separating digit groups in the integer part. See +-- http://en.wikipedia.org/wiki/Decimal_separator for more examples. +-- +-- This returns: the parsed numeric value, the precision (number of digits +-- seen following the decimal point), the decimal point character used if any, +-- and the digit group style if any. +-- +numberp :: GenParser Char JournalContext (Quantity, Int, Maybe Char, Maybe DigitGroupStyle) numberp = do + -- a number is an optional sign followed by a sequence of digits possibly + -- interspersed with periods, commas, or both + -- ptrace "numberp" sign <- signp parts <- many1 $ choice' [many1 digit, many1 $ char ',', many1 $ char '.'] - let numeric = isNumber . headDef '_' - (numparts, puncparts) = partition numeric parts - (ok,decimalpoint',separator') = - case (numparts,puncparts) of - ([],_) -> (False, Nothing, Nothing) -- no digits - (_,[]) -> (True, Nothing, Nothing) -- no punctuation chars - (_,[d:""]) -> (True, Just d, Nothing) -- just one punctuation char, assume it's a decimal point - (_,[_]) -> (False, Nothing, Nothing) -- adjacent punctuation chars, not ok - (_,_:_:_) -> let (s:ss, d) = (init puncparts, last puncparts) -- two or more punctuation chars - in if (any ((/=1).length) puncparts -- adjacent punctuation chars, not ok - || any (s/=) ss -- separator chars differ, not ok - || head parts == s) -- number begins with a separator char, not ok - then (False, Nothing, Nothing) - else if s == d - then (True, Nothing, Just $ head s) -- just one kind of punctuation, assume separator chars - else (True, Just $ head d, Just $ head s) -- separators and a decimal point + dbgAt 8 "numberp parsed" (sign,parts) `seq` return () + + -- check the number is well-formed and identify the decimal point and digit + -- group separator characters used, if any + let (numparts, puncparts) = partition numeric parts + (ok, mdecimalpoint, mseparator) = + case (numparts, puncparts) of + ([],_) -> (False, Nothing, Nothing) -- no digits, not ok + (_,[]) -> (True, Nothing, Nothing) -- digits with no punctuation, ok + (_,[[d]]) -> (True, Just d, Nothing) -- just a single punctuation of length 1, assume it's a decimal point + (_,[_]) -> (False, Nothing, Nothing) -- a single punctuation of some other length, not ok + (_,_:_:_) -> -- two or more punctuations + let (s:ss, d) = (init puncparts, last puncparts) -- the leftmost is a separator and the rightmost may be a decimal point + in if (any ((/=1).length) puncparts -- adjacent punctuation chars, not ok + || any (s/=) ss -- separator chars vary, not ok + || head parts == s) -- number begins with a separator char, not ok + then (False, Nothing, Nothing) + else if s == d + then (True, Nothing, Just $ head s) -- just one kind of punctuation - must be separators + else (True, Just $ head d, Just $ head s) -- separator(s) and a decimal point when (not ok) (fail $ "number seems ill-formed: "++concat parts) - let (intparts',fracparts') = span ((/= decimalpoint') . Just . head) parts + + -- get the digit group sizes and digit group style if any + let (intparts',fracparts') = span ((/= mdecimalpoint) . Just . head) parts (intparts, fracpart) = (filter numeric intparts', filter numeric fracparts') - separatorpositions = reverse $ map length $ drop 1 intparts - int = concat $ "":intparts + groupsizes = reverse $ case map length intparts of + (a:b:cs) | a < b -> b:cs + gs -> gs + mgrps = maybe Nothing (Just . (`DigitGroups` groupsizes)) $ mseparator + + -- put the parts back together without digit group separators, get the precision and parse the value + let int = concat $ "":intparts frac = concat $ "":fracpart precision = length frac int' = if null int then "0" else int frac' = if null frac then "0" else frac quantity = read $ sign++int'++"."++frac' -- this read should never fail - (decimalpoint, separator) = case (decimalpoint', separator') of (Just d, Just s) -> (d,s) - (Just '.',Nothing) -> ('.',',') - (Just ',',Nothing) -> (',','.') - (Nothing, Just '.') -> (',','.') - (Nothing, Just ',') -> ('.',',') - _ -> ('.',',') - return (quantity,precision,decimalpoint,separator,separatorpositions) - "numberp" + return $ dbgAt 8 "numberp quantity,precision,mdecimalpoint,mgrps" (quantity,precision,mdecimalpoint,mgrps) + "numberp" + where + numeric = isNumber . headDef '_' + #ifdef TESTS test_numberp = do let s `is` n = assertParseEqual' (parseWithCtx nullctx numberp s) n diff --git a/tests/journal/default-commodity.test b/tests/journal/default-commodity.test index 9bb258661..e22ab8016 100644 --- a/tests/journal/default-commodity.test +++ b/tests/journal/default-commodity.test @@ -1,5 +1,7 @@ # a default commodity defined with the D directive will be used for any -# commodity-less amounts in subsequent transactions. +# subsequent commodity-less posting amounts. The sample amount's display style +# is also applied, and the resulting amount may end up setting the canonical +# display style for the commodity. # 1. no default commodity hledgerdev -f- print @@ -54,8 +56,9 @@ D $1,000.0 >>>=0 -# 5. as above, sets the commodity of the commodityless amount, but an -# earlier explicit dollar amount sets the display settings for dollar +# 5. commodity and display style applied to the second posting amount.. +# which ends up setting the digit group style, since it's the first amount +# with digit groups. The great precision is used. hledgerdev -f- print <<< D $1,000.0 @@ -63,21 +66,6 @@ D $1,000.0 (a) $1000000.00 (b) 1000000 >>> -2010/01/01 - (a) $1000000.00 - (b) $1000000.00 - ->>>=0 - -# 6. as above, but the commodityless amount is earliest, so it sets the -# display settings for dollar. The greatest precision is preserved though. -hledgerdev -f- print -<<< -D $1,000.0 -2010/1/1 - (a) 1000000 - (b) $1000000.00 ->>> 2010/01/01 (a) $1,000,000.00 (b) $1,000,000.00