lib: refactor the raw number parser [API]

- Purpose: to reduce the verbosity of the previous implementation
- Split off `AmbiguousNumber` into its own type
- Introduce a function `AmbiguousNumber -> RawNumber` explicitly capturing the
  disambiguation logic
- Reduce the number of remaining constructors in `RawNumber` to just two,
  `WithSeparator` and `NoSeparator`
  - The choice to distinguish by the presence of digit separators is motivated
    by the need for this information later on when disallowing exponents on
    numbers with digit separators
This commit is contained in:
Alex Chen 2018-05-24 15:52:09 -06:00 committed by Simon Michael
parent 6ffa9cb3cd
commit f7fd6e6525

View File

@ -101,7 +101,7 @@ import Control.Monad.State.Strict
import Data.Bifunctor import Data.Bifunctor
import Data.Char import Data.Char
import Data.Data import Data.Data
import Data.Decimal (DecimalRaw (Decimal)) import Data.Decimal (DecimalRaw (Decimal), Decimal)
import Data.Default import Data.Default
import Data.Functor.Identity import Data.Functor.Identity
import Data.List.Compat import Data.List.Compat
@ -569,12 +569,13 @@ rightsymbolamountp :: Monad m => JournalParser m Amount
rightsymbolamountp = do rightsymbolamountp = do
m <- lift multiplierp m <- lift multiplierp
sign <- lift signp sign <- lift signp
rawnum <- lift $ rawnumberp ambiguousRawNum <- lift rawnumberp
expMod <- lift . option id $ try exponentp expMod <- lift . option id $ try exponentp
commodityspaced <- lift $ skipMany' spacenonewline commodityspaced <- lift $ skipMany' spacenonewline
c <- lift commoditysymbolp c <- lift commoditysymbolp
suggestedStyle <- getAmountStyle c suggestedStyle <- getAmountStyle c
let (q0,prec0,mdec,mgrps) = fromRawNumber suggestedStyle rawnum let (q0,prec0,mdec,mgrps) =
fromRawNumber $ either (disambiguateNumber suggestedStyle) id ambiguousRawNum
(q, prec) = expMod (sign q0, prec0) (q, prec) = expMod (sign q0, prec0)
p <- priceamountp p <- priceamountp
let s = amountstyle{ascommodityside=R, ascommodityspaced=commodityspaced, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps} let s = amountstyle{ascommodityside=R, ascommodityspaced=commodityspaced, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
@ -671,11 +672,11 @@ numberp suggestedStyle = do
-- interspersed with periods, commas, or both -- interspersed with periods, commas, or both
-- ptrace "numberp" -- ptrace "numberp"
sign <- signp sign <- signp
raw <- rawnumberp raw <- either (disambiguateNumber suggestedStyle) id <$> rawnumberp
dbg8 "numberp suggestedStyle" suggestedStyle `seq` return () dbg8 "numberp suggestedStyle" suggestedStyle `seq` return ()
let (q, prec, decSep, groups) = let (q, prec, decSep, groups) =
dbg8 "numberp quantity,precision,mdecimalpoint,mgrps" dbg8 "numberp quantity,precision,mdecimalpoint,mgrps"
$ fromRawNumber suggestedStyle raw $ fromRawNumber raw
mExp <- optional $ try $ exponentp mExp <- optional $ try $ exponentp
case mExp of case mExp of
Just expFunc Just expFunc
@ -692,79 +693,26 @@ exponentp = do
return $ bimap (* 10^^exp) (max 0 . subtract exp) return $ bimap (* 10^^exp) (max 0 . subtract exp)
<?> "exponentp" <?> "exponentp"
-- | Interpret a raw number as a decimal number, and identify the decimal -- | Interpret a raw number as a decimal number.
-- point charcter and digit separating scheme. There is only one ambiguous
-- case: when there is just a single separator between two digit groups.
-- Disambiguate using an amount style, if provided; otherwise, assume that
-- the separator is a decimal point.
-- --
-- Returns: -- Returns:
-- - the decimal number -- - the decimal number
-- - the precision (number of digits after the decimal point) -- - the precision (number of digits after the decimal point)
-- - the decimal point character, if any -- - the decimal point character, if any
-- - the digit group style, if any (digit group character and sizes of digit groups) -- - the digit group style, if any (digit group character and sizes of digit groups)
fromRawNumber fromRawNumber :: RawNumber -> (Quantity, Int, Maybe Char, Maybe DigitGroupStyle)
:: Maybe AmountStyle fromRawNumber raw = case raw of
-> RawNumber
-> (Quantity, Int, Maybe Char, Maybe DigitGroupStyle)
fromRawNumber suggestedStyle raw = case raw of
LeadingDecimalPt decPt digitGrp -> NoSeparators digitGrp mDecimals ->
let quantity = let decimalGrp = maybe mempty snd mDecimals
Decimal (fromIntegral precision) (digitGroupNumber digitGrp) (quantity, precision) = toDecimal digitGrp decimalGrp
precision = digitGroupLength digitGrp in (quantity, precision, fmap fst mDecimals, Nothing)
in (quantity, precision, Just decPt, Nothing)
TrailingDecimalPt digitGrp decPt -> WithSeparators digitSep digitGrps mDecimals ->
let quantity = let decimalGrp = maybe mempty snd mDecimals
Decimal (fromIntegral precision) (digitGroupNumber digitGrp) (quantity, precision) = toDecimal (mconcat digitGrps) decimalGrp
precision = 0 digitGroupStyle = DigitGroups digitSep (groupSizes digitGrps)
in (quantity, precision, Just decPt, Nothing) in (quantity, precision, fmap fst mDecimals, Just digitGroupStyle)
NoSeparators digitGrp ->
let quantity =
Decimal (fromIntegral precision) (digitGroupNumber digitGrp)
precision = 0
in (quantity, precision, Nothing, Nothing)
AmbiguousNumber digitGrp1 sep digitGrp2
-- If present, use the suggested style to disambiguate;
-- otherwise, assume that the separator is a decimal point where possible.
| isDecimalPointChar sep
&& maybe True (sep `isValidDecimalBy`) suggestedStyle ->
-- Assuming that the separator is a decimal point
let quantity =
Decimal (fromIntegral precision)
(digitGroupNumber $ digitGrp1 <> digitGrp2)
precision = digitGroupLength digitGrp2
in (quantity, precision, Just sep, Nothing)
| otherwise ->
-- Assuming that the separator is digit separator
let quantity =
Decimal (fromIntegral precision)
(digitGroupNumber $ digitGrp1 <> digitGrp2)
precision = 0
digitGroupStyle = Just $
DigitGroups sep (groupSizes $ [digitGrp1, digitGrp2])
in (quantity, precision, Nothing, digitGroupStyle)
DigitSeparators digitSep digitGrps ->
let quantity =
Decimal (fromIntegral precision)
(digitGroupNumber $ mconcat digitGrps)
precision = 0
digitGroupStyle = Just $ DigitGroups digitSep (groupSizes digitGrps)
in (quantity, precision, Nothing, digitGroupStyle)
BothSeparators digitSep digitGrps decPt decimalGrp ->
let quantity =
Decimal (fromIntegral precision)
(digitGroupNumber $ mconcat digitGrps <> decimalGrp)
precision = digitGroupLength decimalGrp
digitGroupStyle = Just $ DigitGroups digitSep (groupSizes digitGrps)
in (quantity, precision, Just decPt, digitGroupStyle)
where where
-- Outputs digit group sizes from least significant to most significant -- Outputs digit group sizes from least significant to most significant
@ -773,6 +721,23 @@ fromRawNumber suggestedStyle raw = case raw of
(a:b:cs) | a < b -> b:cs (a:b:cs) | a < b -> b:cs
gs -> gs gs -> gs
toDecimal :: DigitGrp -> DigitGrp -> (Decimal, Int)
toDecimal preDecimalGrp postDecimalGrp = (quantity, precision)
where
quantity = Decimal (fromIntegral precision)
(digitGroupNumber $ preDecimalGrp <> postDecimalGrp)
precision = digitGroupLength postDecimalGrp
disambiguateNumber :: Maybe AmountStyle -> AmbiguousNumber -> RawNumber
disambiguateNumber suggestedStyle (AmbiguousNumber grp1 sep grp2) =
-- If present, use the suggested style to disambiguate;
-- otherwise, assume that the separator is a decimal point where possible.
if isDecimalPointChar sep &&
maybe True (sep `isValidDecimalBy`) suggestedStyle
then NoSeparators grp1 (Just (sep, grp2))
else WithSeparators sep [grp1, grp2] Nothing
where
isValidDecimalBy :: Char -> AmountStyle -> Bool isValidDecimalBy :: Char -> AmountStyle -> Bool
isValidDecimalBy c = \case isValidDecimalBy c = \case
AmountStyle{asdecimalpoint = Just d} -> d == c AmountStyle{asdecimalpoint = Just d} -> d == c
@ -780,13 +745,12 @@ fromRawNumber suggestedStyle raw = case raw of
AmountStyle{asprecision = 0} -> False AmountStyle{asprecision = 0} -> False
_ -> True _ -> True
-- | Parse and interpret the structure of a number without external hints.
-- | Parse and interpret the structure of a number as far as possible -- Numbers are digit strings, possibly separated into digit groups by one
-- without external hints. Numbers are digit strings, possibly separated -- of two types of separators. (1) Numbers may optionally have a decimal
-- into digit groups by one of two types of separators. (1) Numbers may -- point, which may be either a period or comma. (2) Numbers may
-- optionally have a decimal point, which may be either a period or comma. -- optionally contain digit group separators, which must all be either a
-- (2) Numbers may optionally contain digit group separators, which must -- period, a comma, or a space.
-- all be either a period, a comma, or a space.
-- --
-- It is our task to deduce the identities of the decimal point and digit -- It is our task to deduce the identities of the decimal point and digit
-- separator characters, based on the allowed syntax. For instance, we -- separator characters, based on the allowed syntax. For instance, we
@ -794,54 +758,63 @@ fromRawNumber suggestedStyle raw = case raw of
-- must succeed all digit group separators. -- must succeed all digit group separators.
-- --
-- >>> parseTest rawnumberp "1,234,567.89" -- >>> parseTest rawnumberp "1,234,567.89"
-- BothSeparators ',' ["1","234","567"] '.' "89" -- Right (WithSeparators ',' ["1","234","567"] (Just ('.',"89")))
-- >>> parseTest rawnumberp "1,000"
-- Left (AmbiguousNumber "1" ',' "000")
-- >>> parseTest rawnumberp "1 000" -- >>> parseTest rawnumberp "1 000"
-- AmbiguousNumber "1" ' ' "000" -- Right (WithSeparators ' ' ["1","000"] Nothing)
-- --
rawnumberp :: TextParser m RawNumber rawnumberp :: TextParser m (Either AmbiguousNumber RawNumber)
rawnumberp = label "rawnumberp" $ do rawnumberp = label "rawnumberp" $ do
rawNumber <- leadingDecimalPt <|> leadingDigits rawNumber <- fmap Right leadingDecimalPt <|> leadingDigits
-- Guard against mistyped numbers -- Guard against mistyped numbers
notFollowedBy $ satisfy isDecimalPointChar <|> (char ' ' >> digitChar) notFollowedBy $ satisfy isDecimalPointChar <|> char ' ' *> digitChar
return $ dbg8 "rawnumberp" rawNumber return $ dbg8 "rawnumberp" rawNumber
where where
leadingDecimalPt :: TextParser m RawNumber leadingDecimalPt :: TextParser m RawNumber
leadingDecimalPt = leadingDecimalPt = do
LeadingDecimalPt <$> satisfy isDecimalPointChar <*> digitgroupp decPt <- satisfy isDecimalPointChar
decGrp <- digitgroupp
pure $ NoSeparators mempty (Just (decPt, decGrp))
leadingDigits :: TextParser m RawNumber leadingDigits :: TextParser m (Either AmbiguousNumber RawNumber)
leadingDigits = do leadingDigits = do
grp1 <- digitgroupp grp1 <- digitgroupp
withSeparators grp1 <|> trailingDecimalPt grp1 <|> pure (NoSeparators grp1) withSeparators grp1 <|> fmap Right (trailingDecimalPt grp1)
<|> pure (Right $ NoSeparators grp1 Nothing)
withSeparators :: DigitGrp -> TextParser m RawNumber withSeparators :: DigitGrp -> TextParser m (Either AmbiguousNumber RawNumber)
withSeparators grp1 = do withSeparators grp1 = do
(sep, grp2) <- try $ (,) <$> satisfy isDigitSeparatorChar <*> digitgroupp (sep, grp2) <- try $ (,) <$> satisfy isDigitSeparatorChar <*> digitgroupp
grps <- many $ try $ char sep *> digitgroupp grps <- many $ try $ char sep *> digitgroupp
let digitGroups = grp1 : grp2 : grps let digitGroups = grp1 : grp2 : grps
withDecimalPt sep digitGroups <|> pure (withoutDecimalPt grp1 sep grp2 grps) fmap Right (withDecimalPt sep digitGroups)
<|> pure (withoutDecimalPt grp1 sep grp2 grps)
withDecimalPt :: Char -> [DigitGrp] -> TextParser m RawNumber withDecimalPt :: Char -> [DigitGrp] -> TextParser m RawNumber
withDecimalPt digitSep digitGroups = do withDecimalPt digitSep digitGroups = do
decimalPt <- satisfy $ \c -> isDecimalPointChar c && c /= digitSep decPt <- satisfy $ \c -> isDecimalPointChar c && c /= digitSep
decimalDigitGroup <- option mempty digitgroupp decDigitGrp <- option mempty digitgroupp
pure $ BothSeparators digitSep digitGroups decimalPt decimalDigitGroup pure $ WithSeparators digitSep digitGroups (Just (decPt, decDigitGrp))
withoutDecimalPt :: DigitGrp -> Char -> DigitGrp -> [DigitGrp] -> RawNumber withoutDecimalPt
:: DigitGrp
-> Char
-> DigitGrp
-> [DigitGrp]
-> Either AmbiguousNumber RawNumber
withoutDecimalPt grp1 sep grp2 grps withoutDecimalPt grp1 sep grp2 grps
| null grps = AmbiguousNumber grp1 sep grp2 | null grps && isDecimalPointChar sep =
| otherwise = DigitSeparators sep (grp1:grp2:grps) Left $ AmbiguousNumber grp1 sep grp2
| otherwise = Right $ WithSeparators sep (grp1:grp2:grps) Nothing
trailingDecimalPt :: DigitGrp -> TextParser m RawNumber trailingDecimalPt :: DigitGrp -> TextParser m RawNumber
trailingDecimalPt grp1 = do trailingDecimalPt grp1 = do
decimalPt <- satisfy isDecimalPointChar decPt <- satisfy isDecimalPointChar
pure $ TrailingDecimalPt grp1 decimalPt pure $ NoSeparators grp1 (Just (decPt, mempty))
isDecimalPointChar :: Char -> Bool isDecimalPointChar :: Char -> Bool
@ -879,12 +852,11 @@ digitgroupp = label "digit group"
data RawNumber data RawNumber
= LeadingDecimalPt Char DigitGrp -- .50 = NoSeparators DigitGrp (Maybe (Char, DigitGrp)) -- 100 or 100. or .100 or 100.50
| TrailingDecimalPt DigitGrp Char -- 100. | WithSeparators Char [DigitGrp] (Maybe (Char, DigitGrp)) -- 1,000,000 or 1,000.50
| NoSeparators DigitGrp -- 100 deriving (Show, Eq)
| AmbiguousNumber DigitGrp Char DigitGrp -- 1,000
| DigitSeparators Char [DigitGrp] -- 1,000,000 data AmbiguousNumber = AmbiguousNumber DigitGrp Char DigitGrp -- 1,000
| BothSeparators Char [DigitGrp] Char DigitGrp -- 1,000.50
deriving (Show, Eq) deriving (Show, Eq)
-- test_numberp = do -- test_numberp = do