lib: refactor the raw number parser [API]
- Purpose: to reduce the verbosity of the previous implementation
- Split off `AmbiguousNumber` into its own type
- Introduce a function `AmbiguousNumber -> RawNumber` explicitly capturing the
disambiguation logic
- Reduce the number of remaining constructors in `RawNumber` to just two,
`WithSeparator` and `NoSeparator`
- The choice to distinguish by the presence of digit separators is motivated
by the need for this information later on when disallowing exponents on
numbers with digit separators
This commit is contained in:
parent
6ffa9cb3cd
commit
f7fd6e6525
@ -101,7 +101,7 @@ import Control.Monad.State.Strict
|
|||||||
import Data.Bifunctor
|
import Data.Bifunctor
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import Data.Data
|
import Data.Data
|
||||||
import Data.Decimal (DecimalRaw (Decimal))
|
import Data.Decimal (DecimalRaw (Decimal), Decimal)
|
||||||
import Data.Default
|
import Data.Default
|
||||||
import Data.Functor.Identity
|
import Data.Functor.Identity
|
||||||
import Data.List.Compat
|
import Data.List.Compat
|
||||||
@ -569,12 +569,13 @@ rightsymbolamountp :: Monad m => JournalParser m Amount
|
|||||||
rightsymbolamountp = do
|
rightsymbolamountp = do
|
||||||
m <- lift multiplierp
|
m <- lift multiplierp
|
||||||
sign <- lift signp
|
sign <- lift signp
|
||||||
rawnum <- lift $ rawnumberp
|
ambiguousRawNum <- lift rawnumberp
|
||||||
expMod <- lift . option id $ try exponentp
|
expMod <- lift . option id $ try exponentp
|
||||||
commodityspaced <- lift $ skipMany' spacenonewline
|
commodityspaced <- lift $ skipMany' spacenonewline
|
||||||
c <- lift commoditysymbolp
|
c <- lift commoditysymbolp
|
||||||
suggestedStyle <- getAmountStyle c
|
suggestedStyle <- getAmountStyle c
|
||||||
let (q0,prec0,mdec,mgrps) = fromRawNumber suggestedStyle rawnum
|
let (q0,prec0,mdec,mgrps) =
|
||||||
|
fromRawNumber $ either (disambiguateNumber suggestedStyle) id ambiguousRawNum
|
||||||
(q, prec) = expMod (sign q0, prec0)
|
(q, prec) = expMod (sign q0, prec0)
|
||||||
p <- priceamountp
|
p <- priceamountp
|
||||||
let s = amountstyle{ascommodityside=R, ascommodityspaced=commodityspaced, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
|
let s = amountstyle{ascommodityside=R, ascommodityspaced=commodityspaced, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
|
||||||
@ -671,11 +672,11 @@ numberp suggestedStyle = do
|
|||||||
-- interspersed with periods, commas, or both
|
-- interspersed with periods, commas, or both
|
||||||
-- ptrace "numberp"
|
-- ptrace "numberp"
|
||||||
sign <- signp
|
sign <- signp
|
||||||
raw <- rawnumberp
|
raw <- either (disambiguateNumber suggestedStyle) id <$> rawnumberp
|
||||||
dbg8 "numberp suggestedStyle" suggestedStyle `seq` return ()
|
dbg8 "numberp suggestedStyle" suggestedStyle `seq` return ()
|
||||||
let (q, prec, decSep, groups) =
|
let (q, prec, decSep, groups) =
|
||||||
dbg8 "numberp quantity,precision,mdecimalpoint,mgrps"
|
dbg8 "numberp quantity,precision,mdecimalpoint,mgrps"
|
||||||
$ fromRawNumber suggestedStyle raw
|
$ fromRawNumber raw
|
||||||
mExp <- optional $ try $ exponentp
|
mExp <- optional $ try $ exponentp
|
||||||
case mExp of
|
case mExp of
|
||||||
Just expFunc
|
Just expFunc
|
||||||
@ -692,79 +693,26 @@ exponentp = do
|
|||||||
return $ bimap (* 10^^exp) (max 0 . subtract exp)
|
return $ bimap (* 10^^exp) (max 0 . subtract exp)
|
||||||
<?> "exponentp"
|
<?> "exponentp"
|
||||||
|
|
||||||
-- | Interpret a raw number as a decimal number, and identify the decimal
|
-- | Interpret a raw number as a decimal number.
|
||||||
-- point charcter and digit separating scheme. There is only one ambiguous
|
|
||||||
-- case: when there is just a single separator between two digit groups.
|
|
||||||
-- Disambiguate using an amount style, if provided; otherwise, assume that
|
|
||||||
-- the separator is a decimal point.
|
|
||||||
--
|
--
|
||||||
-- Returns:
|
-- Returns:
|
||||||
-- - the decimal number
|
-- - the decimal number
|
||||||
-- - the precision (number of digits after the decimal point)
|
-- - the precision (number of digits after the decimal point)
|
||||||
-- - the decimal point character, if any
|
-- - the decimal point character, if any
|
||||||
-- - the digit group style, if any (digit group character and sizes of digit groups)
|
-- - the digit group style, if any (digit group character and sizes of digit groups)
|
||||||
fromRawNumber
|
fromRawNumber :: RawNumber -> (Quantity, Int, Maybe Char, Maybe DigitGroupStyle)
|
||||||
:: Maybe AmountStyle
|
fromRawNumber raw = case raw of
|
||||||
-> RawNumber
|
|
||||||
-> (Quantity, Int, Maybe Char, Maybe DigitGroupStyle)
|
|
||||||
fromRawNumber suggestedStyle raw = case raw of
|
|
||||||
|
|
||||||
LeadingDecimalPt decPt digitGrp ->
|
NoSeparators digitGrp mDecimals ->
|
||||||
let quantity =
|
let decimalGrp = maybe mempty snd mDecimals
|
||||||
Decimal (fromIntegral precision) (digitGroupNumber digitGrp)
|
(quantity, precision) = toDecimal digitGrp decimalGrp
|
||||||
precision = digitGroupLength digitGrp
|
in (quantity, precision, fmap fst mDecimals, Nothing)
|
||||||
in (quantity, precision, Just decPt, Nothing)
|
|
||||||
|
|
||||||
TrailingDecimalPt digitGrp decPt ->
|
WithSeparators digitSep digitGrps mDecimals ->
|
||||||
let quantity =
|
let decimalGrp = maybe mempty snd mDecimals
|
||||||
Decimal (fromIntegral precision) (digitGroupNumber digitGrp)
|
(quantity, precision) = toDecimal (mconcat digitGrps) decimalGrp
|
||||||
precision = 0
|
digitGroupStyle = DigitGroups digitSep (groupSizes digitGrps)
|
||||||
in (quantity, precision, Just decPt, Nothing)
|
in (quantity, precision, fmap fst mDecimals, Just digitGroupStyle)
|
||||||
|
|
||||||
NoSeparators digitGrp ->
|
|
||||||
let quantity =
|
|
||||||
Decimal (fromIntegral precision) (digitGroupNumber digitGrp)
|
|
||||||
precision = 0
|
|
||||||
in (quantity, precision, Nothing, Nothing)
|
|
||||||
|
|
||||||
AmbiguousNumber digitGrp1 sep digitGrp2
|
|
||||||
-- If present, use the suggested style to disambiguate;
|
|
||||||
-- otherwise, assume that the separator is a decimal point where possible.
|
|
||||||
| isDecimalPointChar sep
|
|
||||||
&& maybe True (sep `isValidDecimalBy`) suggestedStyle ->
|
|
||||||
|
|
||||||
-- Assuming that the separator is a decimal point
|
|
||||||
let quantity =
|
|
||||||
Decimal (fromIntegral precision)
|
|
||||||
(digitGroupNumber $ digitGrp1 <> digitGrp2)
|
|
||||||
precision = digitGroupLength digitGrp2
|
|
||||||
in (quantity, precision, Just sep, Nothing)
|
|
||||||
|
|
||||||
| otherwise ->
|
|
||||||
-- Assuming that the separator is digit separator
|
|
||||||
let quantity =
|
|
||||||
Decimal (fromIntegral precision)
|
|
||||||
(digitGroupNumber $ digitGrp1 <> digitGrp2)
|
|
||||||
precision = 0
|
|
||||||
digitGroupStyle = Just $
|
|
||||||
DigitGroups sep (groupSizes $ [digitGrp1, digitGrp2])
|
|
||||||
in (quantity, precision, Nothing, digitGroupStyle)
|
|
||||||
|
|
||||||
DigitSeparators digitSep digitGrps ->
|
|
||||||
let quantity =
|
|
||||||
Decimal (fromIntegral precision)
|
|
||||||
(digitGroupNumber $ mconcat digitGrps)
|
|
||||||
precision = 0
|
|
||||||
digitGroupStyle = Just $ DigitGroups digitSep (groupSizes digitGrps)
|
|
||||||
in (quantity, precision, Nothing, digitGroupStyle)
|
|
||||||
|
|
||||||
BothSeparators digitSep digitGrps decPt decimalGrp ->
|
|
||||||
let quantity =
|
|
||||||
Decimal (fromIntegral precision)
|
|
||||||
(digitGroupNumber $ mconcat digitGrps <> decimalGrp)
|
|
||||||
precision = digitGroupLength decimalGrp
|
|
||||||
digitGroupStyle = Just $ DigitGroups digitSep (groupSizes digitGrps)
|
|
||||||
in (quantity, precision, Just decPt, digitGroupStyle)
|
|
||||||
|
|
||||||
where
|
where
|
||||||
-- Outputs digit group sizes from least significant to most significant
|
-- Outputs digit group sizes from least significant to most significant
|
||||||
@ -773,6 +721,23 @@ fromRawNumber suggestedStyle raw = case raw of
|
|||||||
(a:b:cs) | a < b -> b:cs
|
(a:b:cs) | a < b -> b:cs
|
||||||
gs -> gs
|
gs -> gs
|
||||||
|
|
||||||
|
toDecimal :: DigitGrp -> DigitGrp -> (Decimal, Int)
|
||||||
|
toDecimal preDecimalGrp postDecimalGrp = (quantity, precision)
|
||||||
|
where
|
||||||
|
quantity = Decimal (fromIntegral precision)
|
||||||
|
(digitGroupNumber $ preDecimalGrp <> postDecimalGrp)
|
||||||
|
precision = digitGroupLength postDecimalGrp
|
||||||
|
|
||||||
|
|
||||||
|
disambiguateNumber :: Maybe AmountStyle -> AmbiguousNumber -> RawNumber
|
||||||
|
disambiguateNumber suggestedStyle (AmbiguousNumber grp1 sep grp2) =
|
||||||
|
-- If present, use the suggested style to disambiguate;
|
||||||
|
-- otherwise, assume that the separator is a decimal point where possible.
|
||||||
|
if isDecimalPointChar sep &&
|
||||||
|
maybe True (sep `isValidDecimalBy`) suggestedStyle
|
||||||
|
then NoSeparators grp1 (Just (sep, grp2))
|
||||||
|
else WithSeparators sep [grp1, grp2] Nothing
|
||||||
|
where
|
||||||
isValidDecimalBy :: Char -> AmountStyle -> Bool
|
isValidDecimalBy :: Char -> AmountStyle -> Bool
|
||||||
isValidDecimalBy c = \case
|
isValidDecimalBy c = \case
|
||||||
AmountStyle{asdecimalpoint = Just d} -> d == c
|
AmountStyle{asdecimalpoint = Just d} -> d == c
|
||||||
@ -780,13 +745,12 @@ fromRawNumber suggestedStyle raw = case raw of
|
|||||||
AmountStyle{asprecision = 0} -> False
|
AmountStyle{asprecision = 0} -> False
|
||||||
_ -> True
|
_ -> True
|
||||||
|
|
||||||
|
-- | Parse and interpret the structure of a number without external hints.
|
||||||
-- | Parse and interpret the structure of a number as far as possible
|
-- Numbers are digit strings, possibly separated into digit groups by one
|
||||||
-- without external hints. Numbers are digit strings, possibly separated
|
-- of two types of separators. (1) Numbers may optionally have a decimal
|
||||||
-- into digit groups by one of two types of separators. (1) Numbers may
|
-- point, which may be either a period or comma. (2) Numbers may
|
||||||
-- optionally have a decimal point, which may be either a period or comma.
|
-- optionally contain digit group separators, which must all be either a
|
||||||
-- (2) Numbers may optionally contain digit group separators, which must
|
-- period, a comma, or a space.
|
||||||
-- all be either a period, a comma, or a space.
|
|
||||||
--
|
--
|
||||||
-- It is our task to deduce the identities of the decimal point and digit
|
-- It is our task to deduce the identities of the decimal point and digit
|
||||||
-- separator characters, based on the allowed syntax. For instance, we
|
-- separator characters, based on the allowed syntax. For instance, we
|
||||||
@ -794,54 +758,63 @@ fromRawNumber suggestedStyle raw = case raw of
|
|||||||
-- must succeed all digit group separators.
|
-- must succeed all digit group separators.
|
||||||
--
|
--
|
||||||
-- >>> parseTest rawnumberp "1,234,567.89"
|
-- >>> parseTest rawnumberp "1,234,567.89"
|
||||||
-- BothSeparators ',' ["1","234","567"] '.' "89"
|
-- Right (WithSeparators ',' ["1","234","567"] (Just ('.',"89")))
|
||||||
|
-- >>> parseTest rawnumberp "1,000"
|
||||||
|
-- Left (AmbiguousNumber "1" ',' "000")
|
||||||
-- >>> parseTest rawnumberp "1 000"
|
-- >>> parseTest rawnumberp "1 000"
|
||||||
-- AmbiguousNumber "1" ' ' "000"
|
-- Right (WithSeparators ' ' ["1","000"] Nothing)
|
||||||
--
|
--
|
||||||
rawnumberp :: TextParser m RawNumber
|
rawnumberp :: TextParser m (Either AmbiguousNumber RawNumber)
|
||||||
rawnumberp = label "rawnumberp" $ do
|
rawnumberp = label "rawnumberp" $ do
|
||||||
rawNumber <- leadingDecimalPt <|> leadingDigits
|
rawNumber <- fmap Right leadingDecimalPt <|> leadingDigits
|
||||||
|
|
||||||
-- Guard against mistyped numbers
|
-- Guard against mistyped numbers
|
||||||
notFollowedBy $ satisfy isDecimalPointChar <|> (char ' ' >> digitChar)
|
notFollowedBy $ satisfy isDecimalPointChar <|> char ' ' *> digitChar
|
||||||
|
|
||||||
return $ dbg8 "rawnumberp" rawNumber
|
return $ dbg8 "rawnumberp" rawNumber
|
||||||
|
|
||||||
where
|
where
|
||||||
|
|
||||||
leadingDecimalPt :: TextParser m RawNumber
|
leadingDecimalPt :: TextParser m RawNumber
|
||||||
leadingDecimalPt =
|
leadingDecimalPt = do
|
||||||
LeadingDecimalPt <$> satisfy isDecimalPointChar <*> digitgroupp
|
decPt <- satisfy isDecimalPointChar
|
||||||
|
decGrp <- digitgroupp
|
||||||
|
pure $ NoSeparators mempty (Just (decPt, decGrp))
|
||||||
|
|
||||||
leadingDigits :: TextParser m RawNumber
|
leadingDigits :: TextParser m (Either AmbiguousNumber RawNumber)
|
||||||
leadingDigits = do
|
leadingDigits = do
|
||||||
grp1 <- digitgroupp
|
grp1 <- digitgroupp
|
||||||
withSeparators grp1 <|> trailingDecimalPt grp1 <|> pure (NoSeparators grp1)
|
withSeparators grp1 <|> fmap Right (trailingDecimalPt grp1)
|
||||||
|
<|> pure (Right $ NoSeparators grp1 Nothing)
|
||||||
|
|
||||||
withSeparators :: DigitGrp -> TextParser m RawNumber
|
withSeparators :: DigitGrp -> TextParser m (Either AmbiguousNumber RawNumber)
|
||||||
withSeparators grp1 = do
|
withSeparators grp1 = do
|
||||||
(sep, grp2) <- try $ (,) <$> satisfy isDigitSeparatorChar <*> digitgroupp
|
(sep, grp2) <- try $ (,) <$> satisfy isDigitSeparatorChar <*> digitgroupp
|
||||||
grps <- many $ try $ char sep *> digitgroupp
|
grps <- many $ try $ char sep *> digitgroupp
|
||||||
|
|
||||||
let digitGroups = grp1 : grp2 : grps
|
let digitGroups = grp1 : grp2 : grps
|
||||||
withDecimalPt sep digitGroups <|> pure (withoutDecimalPt grp1 sep grp2 grps)
|
fmap Right (withDecimalPt sep digitGroups)
|
||||||
|
<|> pure (withoutDecimalPt grp1 sep grp2 grps)
|
||||||
|
|
||||||
withDecimalPt :: Char -> [DigitGrp] -> TextParser m RawNumber
|
withDecimalPt :: Char -> [DigitGrp] -> TextParser m RawNumber
|
||||||
withDecimalPt digitSep digitGroups = do
|
withDecimalPt digitSep digitGroups = do
|
||||||
decimalPt <- satisfy $ \c -> isDecimalPointChar c && c /= digitSep
|
decPt <- satisfy $ \c -> isDecimalPointChar c && c /= digitSep
|
||||||
decimalDigitGroup <- option mempty digitgroupp
|
decDigitGrp <- option mempty digitgroupp
|
||||||
|
|
||||||
pure $ BothSeparators digitSep digitGroups decimalPt decimalDigitGroup
|
pure $ WithSeparators digitSep digitGroups (Just (decPt, decDigitGrp))
|
||||||
|
|
||||||
withoutDecimalPt :: DigitGrp -> Char -> DigitGrp -> [DigitGrp] -> RawNumber
|
withoutDecimalPt
|
||||||
|
:: DigitGrp
|
||||||
|
-> Char
|
||||||
|
-> DigitGrp
|
||||||
|
-> [DigitGrp]
|
||||||
|
-> Either AmbiguousNumber RawNumber
|
||||||
withoutDecimalPt grp1 sep grp2 grps
|
withoutDecimalPt grp1 sep grp2 grps
|
||||||
| null grps = AmbiguousNumber grp1 sep grp2
|
| null grps && isDecimalPointChar sep =
|
||||||
| otherwise = DigitSeparators sep (grp1:grp2:grps)
|
Left $ AmbiguousNumber grp1 sep grp2
|
||||||
|
| otherwise = Right $ WithSeparators sep (grp1:grp2:grps) Nothing
|
||||||
|
|
||||||
trailingDecimalPt :: DigitGrp -> TextParser m RawNumber
|
trailingDecimalPt :: DigitGrp -> TextParser m RawNumber
|
||||||
trailingDecimalPt grp1 = do
|
trailingDecimalPt grp1 = do
|
||||||
decimalPt <- satisfy isDecimalPointChar
|
decPt <- satisfy isDecimalPointChar
|
||||||
pure $ TrailingDecimalPt grp1 decimalPt
|
pure $ NoSeparators grp1 (Just (decPt, mempty))
|
||||||
|
|
||||||
|
|
||||||
isDecimalPointChar :: Char -> Bool
|
isDecimalPointChar :: Char -> Bool
|
||||||
@ -879,12 +852,11 @@ digitgroupp = label "digit group"
|
|||||||
|
|
||||||
|
|
||||||
data RawNumber
|
data RawNumber
|
||||||
= LeadingDecimalPt Char DigitGrp -- .50
|
= NoSeparators DigitGrp (Maybe (Char, DigitGrp)) -- 100 or 100. or .100 or 100.50
|
||||||
| TrailingDecimalPt DigitGrp Char -- 100.
|
| WithSeparators Char [DigitGrp] (Maybe (Char, DigitGrp)) -- 1,000,000 or 1,000.50
|
||||||
| NoSeparators DigitGrp -- 100
|
deriving (Show, Eq)
|
||||||
| AmbiguousNumber DigitGrp Char DigitGrp -- 1,000
|
|
||||||
| DigitSeparators Char [DigitGrp] -- 1,000,000
|
data AmbiguousNumber = AmbiguousNumber DigitGrp Char DigitGrp -- 1,000
|
||||||
| BothSeparators Char [DigitGrp] Char DigitGrp -- 1,000.50
|
|
||||||
deriving (Show, Eq)
|
deriving (Show, Eq)
|
||||||
|
|
||||||
-- test_numberp = do
|
-- test_numberp = do
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user