look harder for decimal point & digit groups (fixes #196)
Amount display styles have been reworked a bit; they are now calculated after journal parsing, not during it. This allows the fix for #196: we now search through the amounts until a decimal point is detected, instead of just looking at the first one; likewise for digit groups. Digit groups are now implemented with a better type. Digit group size detection has been improved a little: 1000,000 now gives group sizes [3,4,4,...], not [3,3,...], and 10,000 gives groups sizes [3,3,...] not [3,2,2,..]. (To get [3,2,2,...] you'd use eg 00,00,000.) There are still some old (or new ?) issues; I don't think we handle inconsistent decimal points & digit groups too well. But for now all tests pass.
This commit is contained in:
		
							parent
							
								
									647d5833ff
								
							
						
					
					
						commit
						c31710d942
					
				| @ -68,7 +68,6 @@ module Hledger.Data.Amount ( | ||||
|   setAmountPrecision, | ||||
|   withPrecision, | ||||
|   canonicaliseAmount, | ||||
|   canonicalStyles, | ||||
|   -- * MixedAmount | ||||
|   nullmixedamt, | ||||
|   missingmixedamt, | ||||
| @ -99,7 +98,7 @@ module Hledger.Data.Amount ( | ||||
| import Data.Char (isDigit) | ||||
| import Data.List | ||||
| import Data.Map (findWithDefault) | ||||
| import Data.Ord (comparing) | ||||
| import Data.Maybe | ||||
| import Test.HUnit | ||||
| import Text.Printf | ||||
| import qualified Data.Map as M | ||||
| @ -111,7 +110,7 @@ import Hledger.Utils | ||||
| 
 | ||||
| deriving instance Show HistoricalPrice | ||||
| 
 | ||||
| amountstyle = AmountStyle L False 0 '.' ',' [] | ||||
| amountstyle = AmountStyle L False 0 (Just '.') Nothing | ||||
| 
 | ||||
| ------------------------------------------------------------------------------- | ||||
| -- Amount | ||||
| @ -281,8 +280,8 @@ showAmount a@(Amount{acommodity=c, aprice=p, astyle=AmountStyle{..}}) = | ||||
| -- | Get the string representation of the number part of of an amount, | ||||
| -- using the display settings from its commodity. | ||||
| showamountquantity :: Amount -> String | ||||
| showamountquantity Amount{aquantity=q, astyle=AmountStyle{asprecision=p, asdecimalpoint=d, asseparator=s, asseparatorpositions=spos}} = | ||||
|     punctuatenumber d s spos $ qstr | ||||
| showamountquantity Amount{aquantity=q, astyle=AmountStyle{asprecision=p, asdecimalpoint=mdec, asdigitgroups=mgrps}} = | ||||
|     punctuatenumber (fromMaybe '.' mdec) mgrps $ qstr | ||||
|     where | ||||
|       -- isint n = fromIntegral (round n) == n | ||||
|       qstr -- p == maxprecision && isint q = printf "%d" (round q::Integer) | ||||
| @ -293,21 +292,26 @@ showamountquantity Amount{aquantity=q, astyle=AmountStyle{asprecision=p, asdecim | ||||
| -- | Replace a number string's decimal point with the specified character, | ||||
| -- and add the specified digit group separators. The last digit group will | ||||
| -- be repeated as needed. | ||||
| punctuatenumber :: Char -> Char -> [Int] -> String -> String | ||||
| punctuatenumber dec sep grps str = sign ++ reverse (addseps sep (extend grps) (reverse int)) ++ frac'' | ||||
| punctuatenumber :: Char -> Maybe DigitGroupStyle -> String -> String | ||||
| punctuatenumber dec mgrps s = sign ++ reverse (applyDigitGroupStyle mgrps (reverse int)) ++ frac'' | ||||
|     where | ||||
|       (sign,num) = break isDigit str | ||||
|       (sign,num) = break isDigit s | ||||
|       (int,frac) = break (=='.') num | ||||
|       frac' = dropWhile (=='.') frac | ||||
|       frac'' | null frac' = "" | ||||
|              | otherwise  = dec:frac' | ||||
|       extend [] = [] | ||||
|       extend gs = init gs ++ repeat (last gs) | ||||
|       addseps _ [] str = str | ||||
|       addseps sep (g:gs) str | ||||
|           | length str <= g = str | ||||
|           | otherwise = let (s,rest) = splitAt g str | ||||
|                         in s ++ [sep] ++ addseps sep gs rest | ||||
| 
 | ||||
| applyDigitGroupStyle :: Maybe DigitGroupStyle -> String -> String | ||||
| applyDigitGroupStyle Nothing s = s | ||||
| applyDigitGroupStyle (Just (DigitGroups c gs)) s = addseps (repeatLast gs) s | ||||
|   where | ||||
|     addseps [] s = s | ||||
|     addseps (g:gs) s | ||||
|       | length s <= g = s | ||||
|       | otherwise     = let (part,rest) = splitAt g s | ||||
|                         in part ++ [c] ++ addseps gs rest | ||||
|     repeatLast [] = [] | ||||
|     repeatLast gs = init gs ++ repeat (last gs) | ||||
| 
 | ||||
| chopdotzero str = reverse $ case reverse str of | ||||
|                               '0':'.':s -> s | ||||
| @ -501,23 +505,6 @@ showMixedAmountWithoutPrice m = concat $ intersperse "\n" $ map showfixedwidth a | ||||
| canonicaliseMixedAmount :: M.Map Commodity AmountStyle -> MixedAmount -> MixedAmount | ||||
| canonicaliseMixedAmount styles (Mixed as) = Mixed $ map (canonicaliseAmount styles) as | ||||
| 
 | ||||
| -- | Given a list of amounts in parse order, build a map from commodities | ||||
| -- to canonical display styles for amounts in that commodity. | ||||
| canonicalStyles :: [Amount] -> M.Map Commodity AmountStyle | ||||
| canonicalStyles amts = M.fromList commstyles | ||||
|   where | ||||
|     samecomm = \a1 a2 -> acommodity a1 == acommodity a2 | ||||
|     commamts = [(acommodity $ head as, as) | as <- groupBy samecomm $ sortBy (comparing acommodity) amts] | ||||
|     commstyles = [(c, s) | ||||
|                  | (c,as) <- commamts | ||||
|                  , let styles = map astyle as | ||||
|                  , let maxprec = maximum $ map asprecision styles | ||||
|                  , let s = (head styles){asprecision=maxprec} | ||||
|                  ] | ||||
| 
 | ||||
| -- lookupStyle :: M.Map Commodity AmountStyle -> Commodity -> AmountStyle | ||||
| -- lookupStyle  | ||||
| 
 | ||||
| ------------------------------------------------------------------------------- | ||||
| -- misc | ||||
| 
 | ||||
|  | ||||
| @ -46,6 +46,7 @@ module Hledger.Data.Journal ( | ||||
|   journalEquityAccountQuery, | ||||
|   journalCashAccountQuery, | ||||
|   -- * Misc | ||||
|   canonicalStyles, | ||||
|   matchpats, | ||||
|   nullctx, | ||||
|   nulljournal, | ||||
| @ -481,12 +482,34 @@ journalCanonicaliseAmounts :: Journal -> Journal | ||||
| journalCanonicaliseAmounts j@Journal{jtxns=ts} = j'' | ||||
|     where | ||||
|       j'' = j'{jtxns=map fixtransaction ts} | ||||
|       j' = j{jcommoditystyles = canonicalStyles $ journalAmounts j} | ||||
|       j' = j{jcommoditystyles = canonicalStyles $ dbgAt 8 "journalAmounts" $ journalAmounts j} | ||||
|       fixtransaction t@Transaction{tpostings=ps} = t{tpostings=map fixposting ps} | ||||
|       fixposting p@Posting{pamount=a} = p{pamount=fixmixedamount a} | ||||
|       fixmixedamount (Mixed as) = Mixed $ map fixamount as | ||||
|       fixamount a@Amount{acommodity=c} = a{astyle=journalCommodityStyle j' c} | ||||
| 
 | ||||
| -- | Given a list of amounts in parse order, build a map from commodities | ||||
| -- to canonical display styles for amounts in that commodity. | ||||
| canonicalStyles :: [Amount] -> M.Map Commodity AmountStyle | ||||
| canonicalStyles amts = M.fromList commstyles | ||||
|   where | ||||
|     samecomm = \a1 a2 -> acommodity a1 == acommodity a2 | ||||
|     commamts = [(acommodity $ head as, as) | as <- groupBy samecomm $ sortBy (comparing acommodity) amts] | ||||
|     commstyles = [(c, canonicalStyleFrom $ map astyle as) | (c,as) <- commamts] | ||||
| 
 | ||||
| -- Given an ordered list of amount styles for a commodity, build a canonical style. | ||||
| canonicalStyleFrom :: [AmountStyle] -> AmountStyle | ||||
| canonicalStyleFrom [] = amountstyle | ||||
| canonicalStyleFrom ss@(first:_) = | ||||
|   first{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps} | ||||
|   where | ||||
|     -- precision is the maximum of all precisions seen | ||||
|     prec = maximum $ map asprecision ss | ||||
|     -- find the first decimal point and the first digit group style seen, | ||||
|     -- or use defaults. | ||||
|     mdec  = Just $ headDef '.' $ catMaybes $ map asdecimalpoint ss | ||||
|     mgrps = maybe Nothing Just $ headMay $ catMaybes $ map asdigitgroups ss | ||||
| 
 | ||||
| -- | Get this journal's canonical amount style for the given commodity, or the null style. | ||||
| journalCommodityStyle :: Journal -> Commodity -> AmountStyle | ||||
| journalCommodityStyle j c = M.findWithDefault amountstyle c $ jcommoditystyles j | ||||
|  | ||||
| @ -56,11 +56,19 @@ data AmountStyle = AmountStyle { | ||||
|       ascommodityside :: Side,       -- ^ does the symbol appear on the left or the right ? | ||||
|       ascommodityspaced :: Bool,     -- ^ space between symbol and quantity ? | ||||
|       asprecision :: Int,            -- ^ number of digits displayed after the decimal point | ||||
|       asdecimalpoint :: Char,        -- ^ character used as decimal point | ||||
|       asseparator :: Char,           -- ^ character used for separating digit groups (eg thousands) | ||||
|       asseparatorpositions :: [Int]  -- ^ positions of digit group separators, counting leftward from decimal point | ||||
|       asdecimalpoint :: Maybe Char,  -- ^ character used as decimal point: period or comma. Nothing means "unspecified, use default" | ||||
|       asdigitgroups :: Maybe DigitGroupStyle -- ^ style for displaying digit groups, if any | ||||
| } deriving (Eq,Ord,Read,Show,Typeable,Data) | ||||
| 
 | ||||
| -- | A style for displaying digit groups in the integer part of a | ||||
| -- floating point number. It consists of the character used to | ||||
| -- separate groups (comma or period, whichever is not used as decimal | ||||
| -- point), and the size of each group, starting with the one nearest | ||||
| -- the decimal point. The last group size is assumed to repeat. Eg, | ||||
| -- comma between thousands is DigitGroups ',' [3]. | ||||
| data DigitGroupStyle = DigitGroups Char [Int] | ||||
|   deriving (Eq,Ord,Read,Show,Typeable,Data) | ||||
| 
 | ||||
| data Amount = Amount { | ||||
|       acommodity :: Commodity, | ||||
|       aquantity :: Quantity, | ||||
|  | ||||
| @ -662,8 +662,8 @@ leftsymbolamount = do | ||||
|   sign <- signp | ||||
|   c <- commoditysymbol  | ||||
|   sp <- many spacenonewline | ||||
|   (q,prec,dec,sep,seppos) <- numberp | ||||
|   let s = amountstyle{ascommodityside=L, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=dec, asseparator=sep, asseparatorpositions=seppos} | ||||
|   (q,prec,mdec,mgrps) <- numberp | ||||
|   let s = amountstyle{ascommodityside=L, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps} | ||||
|   p <- priceamount | ||||
|   let applysign = if sign=="-" then negate else id | ||||
|   return $ applysign $ Amount c q p s | ||||
| @ -671,23 +671,23 @@ leftsymbolamount = do | ||||
| 
 | ||||
| rightsymbolamount :: GenParser Char JournalContext Amount | ||||
| rightsymbolamount = do | ||||
|   (q,prec,dec,sep,seppos) <- numberp | ||||
|   (q,prec,mdec,mgrps) <- numberp | ||||
|   sp <- many spacenonewline | ||||
|   c <- commoditysymbol | ||||
|   p <- priceamount | ||||
|   let s = amountstyle{ascommodityside=R, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=dec, asseparator=sep, asseparatorpositions=seppos} | ||||
|   let s = amountstyle{ascommodityside=R, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps} | ||||
|   return $ Amount c q p s | ||||
|   <?> "right-symbol amount" | ||||
| 
 | ||||
| nosymbolamount :: GenParser Char JournalContext Amount | ||||
| nosymbolamount = do | ||||
|   (q,prec,dec,sep,seppos) <- numberp | ||||
|   (q,prec,mdec,mgrps) <- numberp | ||||
|   p <- priceamount | ||||
|   -- apply the most recently seen default commodity and style to this commodityless amount | ||||
|   defcs <- getDefaultCommodityAndStyle | ||||
|   let (c,s) = case defcs of | ||||
|         Just (defc,defs) -> (defc, defs{asprecision=max (asprecision defs) prec}) | ||||
|         Nothing          -> ("", amountstyle{asprecision=prec, asdecimalpoint=dec, asseparator=sep, asseparatorpositions=seppos}) | ||||
|         Nothing          -> ("", amountstyle{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}) | ||||
|   return $ Amount c q p s | ||||
|   <?> "no-symbol amount" | ||||
| 
 | ||||
| @ -745,54 +745,67 @@ fixedlotprice = | ||||
|           return $ Just a) | ||||
|          <|> return Nothing | ||||
| 
 | ||||
| -- | Parse a numeric quantity for its value and display attributes.  Some | ||||
| -- international number formats (cf | ||||
| -- http://en.wikipedia.org/wiki/Decimal_separator) are accepted: either | ||||
| -- period or comma may be used for the decimal point, and the other of | ||||
| -- these may be used for separating digit groups in the integer part (eg a | ||||
| -- thousands separator).  This returns the numeric value, the precision | ||||
| -- (number of digits to the right of the decimal point), the decimal point | ||||
| -- and separator characters (defaulting to . and ,), and the positions of | ||||
| -- separators (counting leftward from the decimal point, the last is | ||||
| -- assumed to repeat). | ||||
| numberp :: GenParser Char JournalContext (Quantity, Int, Char, Char, [Int]) | ||||
| -- | Parse a string representation of a number for its value and display | ||||
| -- attributes. | ||||
| --  | ||||
| -- Some international number formats are accepted, eg either period or comma | ||||
| -- may be used for the decimal point, and the other of these may be used for | ||||
| -- separating digit groups in the integer part. See | ||||
| -- http://en.wikipedia.org/wiki/Decimal_separator for more examples. | ||||
| --  | ||||
| -- This returns: the parsed numeric value, the precision (number of digits | ||||
| -- seen following the decimal point), the decimal point character used if any, | ||||
| -- and the digit group style if any. | ||||
| --  | ||||
| numberp :: GenParser Char JournalContext (Quantity, Int, Maybe Char, Maybe DigitGroupStyle) | ||||
| numberp = do | ||||
|   -- a number is an optional sign followed by a sequence of digits possibly | ||||
|   -- interspersed with periods, commas, or both | ||||
|   -- ptrace "numberp" | ||||
|   sign <- signp | ||||
|   parts <- many1 $ choice' [many1 digit, many1 $ char ',', many1 $ char '.'] | ||||
|   let numeric = isNumber . headDef '_' | ||||
|       (numparts, puncparts) = partition numeric parts | ||||
|       (ok,decimalpoint',separator') = | ||||
|   dbgAt 8 "numberp parsed" (sign,parts) `seq` return () | ||||
| 
 | ||||
|   -- check the number is well-formed and identify the decimal point and digit | ||||
|   -- group separator characters used, if any | ||||
|   let (numparts, puncparts) = partition numeric parts | ||||
|       (ok, mdecimalpoint, mseparator) = | ||||
|           case (numparts, puncparts) of | ||||
|             ([],_)     -> (False, Nothing, Nothing)  -- no digits | ||||
|             (_,[])     -> (True, Nothing, Nothing)  -- no punctuation chars | ||||
|             (_,[d:""]) -> (True, Just d, Nothing)   -- just one punctuation char, assume it's a decimal point | ||||
|             (_,[_])    -> (False, Nothing, Nothing) -- adjacent punctuation chars, not ok | ||||
|             (_,_:_:_)  -> let (s:ss, d) = (init puncparts, last puncparts) -- two or more punctuation chars | ||||
|             ([],_)     -> (False, Nothing, Nothing)  -- no digits, not ok | ||||
|             (_,[])     -> (True, Nothing, Nothing)   -- digits with no punctuation, ok | ||||
|             (_,[[d]])  -> (True, Just d, Nothing)    -- just a single punctuation of length 1, assume it's a decimal point | ||||
|             (_,[_])    -> (False, Nothing, Nothing)  -- a single punctuation of some other length, not ok | ||||
|             (_,_:_:_)  ->                                       -- two or more punctuations | ||||
|               let (s:ss, d) = (init puncparts, last puncparts)  -- the leftmost is a separator and the rightmost may be a decimal point | ||||
|               in if (any ((/=1).length) puncparts               -- adjacent punctuation chars, not ok | ||||
|                                  || any (s/=) ss                -- separator chars differ, not ok | ||||
|                      || any (s/=) ss                            -- separator chars vary, not ok | ||||
|                      || head parts == s)                        -- number begins with a separator char, not ok | ||||
|                  then (False, Nothing, Nothing) | ||||
|                  else if s == d | ||||
|                                     then (True, Nothing, Just $ head s) -- just one kind of punctuation, assume separator chars | ||||
|                                     else (True, Just $ head d, Just $ head s) -- separators and a decimal point | ||||
|                       then (True, Nothing, Just $ head s)       -- just one kind of punctuation - must be separators | ||||
|                       else (True, Just $ head d, Just $ head s) -- separator(s) and a decimal point | ||||
|   when (not ok) (fail $ "number seems ill-formed: "++concat parts) | ||||
|   let (intparts',fracparts') = span ((/= decimalpoint') . Just . head) parts | ||||
| 
 | ||||
|   -- get the digit group sizes and digit group style if any | ||||
|   let (intparts',fracparts') = span ((/= mdecimalpoint) . Just . head) parts | ||||
|       (intparts, fracpart) = (filter numeric intparts', filter numeric fracparts') | ||||
|       separatorpositions = reverse $ map length $ drop 1 intparts | ||||
|       int = concat $ "":intparts | ||||
|       groupsizes = reverse $ case map length intparts of | ||||
|                                (a:b:cs) | a < b -> b:cs | ||||
|                                gs               -> gs | ||||
|       mgrps = maybe Nothing (Just . (`DigitGroups` groupsizes)) $ mseparator | ||||
| 
 | ||||
|   -- put the parts back together without digit group separators, get the precision and parse the value | ||||
|   let int = concat $ "":intparts | ||||
|       frac = concat $ "":fracpart | ||||
|       precision = length frac | ||||
|       int' = if null int then "0" else int | ||||
|       frac' = if null frac then "0" else frac | ||||
|       quantity = read $ sign++int'++"."++frac' -- this read should never fail | ||||
|       (decimalpoint, separator) = case (decimalpoint', separator') of (Just d,  Just s)   -> (d,s) | ||||
|                                                                       (Just '.',Nothing)  -> ('.',',') | ||||
|                                                                       (Just ',',Nothing)  -> (',','.') | ||||
|                                                                       (Nothing, Just '.') -> (',','.') | ||||
|                                                                       (Nothing, Just ',') -> ('.',',') | ||||
|                                                                       _                   -> ('.',',') | ||||
|   return (quantity,precision,decimalpoint,separator,separatorpositions) | ||||
| 
 | ||||
|   return $ dbgAt 8 "numberp quantity,precision,mdecimalpoint,mgrps" (quantity,precision,mdecimalpoint,mgrps) | ||||
|   <?> "numberp" | ||||
|   where | ||||
|     numeric = isNumber . headDef '_' | ||||
|        | ||||
| #ifdef TESTS | ||||
| test_numberp = do | ||||
|  | ||||
| @ -1,5 +1,7 @@ | ||||
| # a default commodity defined with the D directive will be used for any | ||||
| # commodity-less amounts in subsequent transactions. | ||||
| # subsequent commodity-less posting amounts. The sample amount's display style | ||||
| # is also applied, and the resulting amount may end up setting the canonical | ||||
| # display style for the commodity. | ||||
| 
 | ||||
| # 1. no default commodity | ||||
| hledgerdev -f- print | ||||
| @ -54,8 +56,9 @@ D $1,000.0 | ||||
| 
 | ||||
| >>>=0 | ||||
| 
 | ||||
| # 5. as above, sets the commodity of the commodityless amount, but an | ||||
| # earlier explicit dollar amount sets the display settings for dollar | ||||
| # 5. commodity and display style applied to the second posting amount.. | ||||
| # which ends up setting the digit group style, since it's the first amount | ||||
| # with digit groups. The great precision is used. | ||||
| hledgerdev -f- print | ||||
| <<< | ||||
| D $1,000.0 | ||||
| @ -63,21 +66,6 @@ D $1,000.0 | ||||
|   (a)  $1000000.00 | ||||
|   (b)   1000000 | ||||
| >>> | ||||
| 2010/01/01 | ||||
|     (a)   $1000000.00 | ||||
|     (b)   $1000000.00 | ||||
| 
 | ||||
| >>>=0 | ||||
| 
 | ||||
| # 6. as above, but the commodityless amount is earliest, so it sets the | ||||
| # display settings for dollar. The greatest precision is preserved though. | ||||
| hledgerdev -f- print | ||||
| <<< | ||||
| D $1,000.0 | ||||
| 2010/1/1 | ||||
|   (a)   1000000 | ||||
|   (b)  $1000000.00 | ||||
| >>> | ||||
| 2010/01/01 | ||||
|     (a)  $1,000,000.00 | ||||
|     (b)  $1,000,000.00 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user