1078 lines
		
	
	
		
			41 KiB
		
	
	
	
		
			Haskell
		
	
	
	
	
	
			
		
		
	
	
			1078 lines
		
	
	
		
			41 KiB
		
	
	
	
		
			Haskell
		
	
	
	
	
	
| --- * doc
 | ||
| -- Lines beginning "--- *" are collapsible orgstruct nodes. Emacs users,
 | ||
| -- (add-hook 'haskell-mode-hook
 | ||
| --   (lambda () (set-variable 'orgstruct-heading-prefix-regexp "--- " t))
 | ||
| --   'orgstruct-mode)
 | ||
| -- and press TAB on nodes to expand/collapse.
 | ||
| 
 | ||
| {-|
 | ||
| 
 | ||
| Some common parsers and helpers used by several readers.
 | ||
| Some of these might belong in Hledger.Read.JournalReader or Hledger.Read.
 | ||
| 
 | ||
| -}
 | ||
| 
 | ||
| --- * module
 | ||
| {-# LANGUAGE CPP, BangPatterns, DeriveDataTypeable, RecordWildCards, NamedFieldPuns, NoMonoLocalBinds, ScopedTypeVariables, FlexibleContexts, TupleSections, OverloadedStrings #-}
 | ||
| {-# LANGUAGE LambdaCase #-}
 | ||
| 
 | ||
| module Hledger.Read.Common (
 | ||
|   Reader (..),
 | ||
|   InputOpts (..),
 | ||
|   rawOptsToInputOpts,
 | ||
| 
 | ||
|   -- * parsing utilities
 | ||
|   runTextParser,
 | ||
|   runJournalParser,
 | ||
|   rjp,
 | ||
|   runErroringJournalParser,
 | ||
|   rejp,
 | ||
|   genericSourcePos,
 | ||
|   journalSourcePos,
 | ||
|   generateAutomaticPostings,
 | ||
|   parseAndFinaliseJournal,
 | ||
|   setYear,
 | ||
|   setDefaultCommodityAndStyle,
 | ||
|   getDefaultCommodityAndStyle,
 | ||
|   pushParentAccount,
 | ||
|   popParentAccount,
 | ||
|   getParentAccount,
 | ||
|   addAccountAlias,
 | ||
|   clearAccountAliases,
 | ||
|   journalAddFile,
 | ||
|   parserErrorAt,
 | ||
| 
 | ||
|   -- * parsers
 | ||
|   -- ** transaction bits
 | ||
|   statusp,
 | ||
|   codep,
 | ||
|   descriptionp,
 | ||
| 
 | ||
|   -- ** dates
 | ||
|   datep,
 | ||
|   datetimep,
 | ||
|   secondarydatep,
 | ||
| 
 | ||
|   -- ** account names
 | ||
|   modifiedaccountnamep,
 | ||
|   accountnamep,
 | ||
| 
 | ||
|   -- ** amounts
 | ||
|   spaceandamountormissingp,
 | ||
|   amountp,
 | ||
|   mamountp',
 | ||
|   commoditysymbolp,
 | ||
|   partialbalanceassertionp,
 | ||
|   fixedlotpricep,
 | ||
|   numberp,
 | ||
| 
 | ||
|   -- ** comments
 | ||
|   multilinecommentp,
 | ||
|   emptyorcommentlinep,
 | ||
|   followingcommentp,
 | ||
|   followingcommentandtagsp,
 | ||
| 
 | ||
|   -- ** tags
 | ||
|   commentTags,
 | ||
|   tagsp
 | ||
| )
 | ||
| where
 | ||
| --- * imports
 | ||
| import Prelude ()
 | ||
| import Prelude.Compat hiding (readFile)
 | ||
| import Control.Arrow ((***))
 | ||
| import Control.Monad.Compat
 | ||
| import Control.Monad.Except (ExceptT(..), runExceptT, throwError) --, catchError)
 | ||
| import Control.Monad.State.Strict
 | ||
| import Data.Char
 | ||
| import Data.Data
 | ||
| import Data.Default
 | ||
| import Data.Functor.Identity
 | ||
| import Data.List.Compat
 | ||
| import Data.List.NonEmpty (NonEmpty(..))
 | ||
| import Data.List.Split (wordsBy)
 | ||
| import Data.Maybe
 | ||
| import qualified Data.Map as M
 | ||
| #if !(MIN_VERSION_base(4,11,0))
 | ||
| import Data.Monoid
 | ||
| #endif
 | ||
| import Data.Text (Text)
 | ||
| import qualified Data.Text as T
 | ||
| import Data.Time.Calendar
 | ||
| import Data.Time.LocalTime
 | ||
| import Safe
 | ||
| import System.Time (getClockTime)
 | ||
| import Text.Megaparsec.Compat
 | ||
| 
 | ||
| import Hledger.Data
 | ||
| import Hledger.Utils
 | ||
| import qualified Hledger.Query as Q (Query(Any))
 | ||
| 
 | ||
| -- | A hledger journal reader is a triple of storage format name, a
 | ||
| -- detector of that format, and a parser from that format to Journal.
 | ||
| data Reader = Reader {
 | ||
| 
 | ||
|      -- The canonical name of the format handled by this reader
 | ||
|      rFormat   :: StorageFormat
 | ||
| 
 | ||
|      -- The file extensions recognised as containing this format
 | ||
|     ,rExtensions :: [String]
 | ||
| 
 | ||
|      -- A text parser for this format, accepting input options, file
 | ||
|      -- path for error messages and file contents, producing an exception-raising IO
 | ||
|      -- action that returns a journal or error message.
 | ||
|     ,rParser   :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal
 | ||
| 
 | ||
|      -- Experimental readers are never tried automatically.
 | ||
|     ,rExperimental :: Bool
 | ||
|     }
 | ||
| 
 | ||
| instance Show Reader where show r = rFormat r ++ " reader"
 | ||
| 
 | ||
| -- $setup
 | ||
| 
 | ||
| -- | Various options to use when reading journal files.
 | ||
| -- Similar to CliOptions.inputflags, simplifies the journal-reading functions.
 | ||
| data InputOpts = InputOpts {
 | ||
|      -- files_             :: [FilePath]
 | ||
|      mformat_           :: Maybe StorageFormat  -- ^ a file/storage format to try, unless overridden
 | ||
|                                                 --   by a filename prefix. Nothing means try all.
 | ||
|     ,mrules_file_       :: Maybe FilePath       -- ^ a conversion rules file to use (when reading CSV)
 | ||
|     ,aliases_           :: [String]             -- ^ account name aliases to apply
 | ||
|     ,anon_              :: Bool                 -- ^ do light anonymisation/obfuscation of the data 
 | ||
|     ,ignore_assertions_ :: Bool                 -- ^ don't check balance assertions
 | ||
|     ,new_               :: Bool                 -- ^ read only new transactions since this file was last read
 | ||
|     ,new_save_          :: Bool                 -- ^ save latest new transactions state for next time
 | ||
|     ,pivot_             :: String               -- ^ use the given field's value as the account name 
 | ||
|     ,auto_              :: Bool                 -- ^ generate automatic postings when journal is parsed     
 | ||
|  } deriving (Show, Data) --, Typeable)
 | ||
| 
 | ||
| instance Default InputOpts where def = definputopts
 | ||
| 
 | ||
| definputopts :: InputOpts
 | ||
| definputopts = InputOpts def def def def def def True def def
 | ||
| 
 | ||
| rawOptsToInputOpts :: RawOpts -> InputOpts
 | ||
| rawOptsToInputOpts rawopts = InputOpts{
 | ||
|    -- files_             = map (T.unpack . stripquotes . T.pack) $ listofstringopt "file" rawopts
 | ||
|    mformat_           = Nothing
 | ||
|   ,mrules_file_       = maybestringopt "rules-file" rawopts
 | ||
|   ,aliases_           = map (T.unpack . stripquotes . T.pack) $ listofstringopt "alias" rawopts
 | ||
|   ,anon_              = boolopt "anon" rawopts
 | ||
|   ,ignore_assertions_ = boolopt "ignore-assertions" rawopts
 | ||
|   ,new_               = boolopt "new" rawopts
 | ||
|   ,new_save_          = True
 | ||
|   ,pivot_             = stringopt "pivot" rawopts
 | ||
|   ,auto_              = boolopt "auto" rawopts                        
 | ||
|   }
 | ||
| 
 | ||
| --- * parsing utilities
 | ||
| 
 | ||
| -- | Run a string parser with no state in the identity monad.
 | ||
| runTextParser, rtp :: TextParser Identity a -> Text -> Either (ParseError Char MPErr) a
 | ||
| runTextParser p t =  runParser p "" t
 | ||
| rtp = runTextParser
 | ||
| 
 | ||
| -- XXX odd, why doesn't this take a JournalParser ?
 | ||
| -- | Run a journal parser with a null journal-parsing state.
 | ||
| runJournalParser, rjp :: Monad m => TextParser m a -> Text -> m (Either (ParseError Char MPErr) a)
 | ||
| runJournalParser p t = runParserT p "" t
 | ||
| rjp = runJournalParser
 | ||
| 
 | ||
| -- | Run an error-raising journal parser with a null journal-parsing state.
 | ||
| runErroringJournalParser, rejp :: Monad m => ErroringJournalParser m a -> Text -> m (Either String a)
 | ||
| runErroringJournalParser p t =
 | ||
|   runExceptT $
 | ||
|   runJournalParser (evalStateT p mempty)
 | ||
|                    t >>=
 | ||
|   either (throwError . parseErrorPretty) return
 | ||
| rejp = runErroringJournalParser
 | ||
| 
 | ||
| genericSourcePos :: SourcePos -> GenericSourcePos
 | ||
| genericSourcePos p = GenericSourcePos (sourceName p) (fromIntegral . unPos $ sourceLine p) (fromIntegral . unPos $ sourceColumn p)
 | ||
| 
 | ||
| journalSourcePos :: SourcePos -> SourcePos -> GenericSourcePos
 | ||
| journalSourcePos p p' = JournalSourcePos (sourceName p) (fromIntegral . unPos $ sourceLine p, fromIntegral $ line')
 | ||
|     where line'
 | ||
|             | (unPos $ sourceColumn p') == 1 = unPos (sourceLine p') - 1
 | ||
|             | otherwise = unPos $ sourceLine p' -- might be at end of file withat last new-line
 | ||
| 
 | ||
| 
 | ||
| -- | Generate Automatic postings and add them to the current journal.
 | ||
| generateAutomaticPostings :: Journal -> Journal
 | ||
| generateAutomaticPostings j = j { jtxns = map modifier $ jtxns j }
 | ||
|   where
 | ||
|     modifier = foldr (flip (.) . runModifierTransaction') id mtxns
 | ||
|     runModifierTransaction' = fmap txnTieKnot . runModifierTransaction Q.Any
 | ||
|     mtxns = jmodifiertxns j
 | ||
| 
 | ||
| -- | Given a megaparsec ParsedJournal parser, input options, file
 | ||
| -- path and file content: parse and post-process a Journal, or give an error.
 | ||
| parseAndFinaliseJournal :: ErroringJournalParser IO ParsedJournal -> InputOpts
 | ||
|                            -> FilePath -> Text -> ExceptT String IO Journal
 | ||
| parseAndFinaliseJournal parser iopts f txt = do
 | ||
|   t <- liftIO getClockTime
 | ||
|   y <- liftIO getCurrentYear
 | ||
|   ep <- runParserT (evalStateT parser nulljournal {jparsedefaultyear=Just y}) f txt
 | ||
|   case ep of
 | ||
|     Right pj -> 
 | ||
|       let pj' = if auto_ iopts then generateAutomaticPostings pj else pj in
 | ||
|       case journalFinalise t f txt (not $ ignore_assertions_ iopts) pj' of
 | ||
|                         Right j -> return j
 | ||
|                         Left e  -> throwError e
 | ||
|     Left e   -> throwError $ parseErrorPretty e
 | ||
| 
 | ||
| parseAndFinaliseJournal' :: JournalParser Identity ParsedJournal -> InputOpts 
 | ||
|                             -> FilePath -> Text -> ExceptT String IO Journal
 | ||
| parseAndFinaliseJournal' parser iopts f txt = do
 | ||
|   t <- liftIO getClockTime
 | ||
|   y <- liftIO getCurrentYear
 | ||
|   let ep = runParser (evalStateT parser nulljournal {jparsedefaultyear=Just y}) f txt
 | ||
|   case ep of
 | ||
|     Right pj -> 
 | ||
|       let pj' = if auto_ iopts then generateAutomaticPostings pj else pj in      
 | ||
|       case journalFinalise t f txt (not $ ignore_assertions_ iopts) pj' of
 | ||
|                         Right j -> return j
 | ||
|                         Left e  -> throwError e
 | ||
|     Left e   -> throwError $ parseErrorPretty e
 | ||
| 
 | ||
| setYear :: Year -> JournalParser m ()
 | ||
| setYear y = modify' (\j -> j{jparsedefaultyear=Just y})
 | ||
| 
 | ||
| getYear :: JournalParser m (Maybe Year)
 | ||
| getYear = fmap jparsedefaultyear get
 | ||
| 
 | ||
| setDefaultCommodityAndStyle :: (CommoditySymbol,AmountStyle) -> JournalParser m ()
 | ||
| setDefaultCommodityAndStyle cs = modify' (\j -> j{jparsedefaultcommodity=Just cs})
 | ||
| 
 | ||
| getDefaultCommodityAndStyle :: JournalParser m (Maybe (CommoditySymbol,AmountStyle))
 | ||
| getDefaultCommodityAndStyle = jparsedefaultcommodity `fmap` get
 | ||
| 
 | ||
| -- | Get amount style associated with default currency.
 | ||
| --
 | ||
| -- Returns 'AmountStyle' used to defined by a latest default commodity directive
 | ||
| -- prior to current position within this file or its parents.
 | ||
| getDefaultAmountStyle :: JournalParser m (Maybe AmountStyle)
 | ||
| getDefaultAmountStyle = fmap snd <$> getDefaultCommodityAndStyle
 | ||
| 
 | ||
| -- | Lookup currency-specific amount style.
 | ||
| --
 | ||
| -- Returns 'AmountStyle' used in commodity directive within current journal
 | ||
| -- prior to current position or in its parents files.
 | ||
| getAmountStyle :: CommoditySymbol -> JournalParser m (Maybe AmountStyle)
 | ||
| getAmountStyle commodity = do
 | ||
|     specificStyle <-  maybe Nothing cformat . M.lookup commodity . jcommodities <$> get
 | ||
|     defaultStyle <- fmap snd <$> getDefaultCommodityAndStyle
 | ||
|     let effectiveStyle = listToMaybe $ catMaybes [specificStyle, defaultStyle]
 | ||
|     return effectiveStyle
 | ||
| 
 | ||
| pushAccount :: AccountName -> JournalParser m ()
 | ||
| pushAccount acct = modify' (\j -> j{jaccounts = (acct, Nothing) : jaccounts j})
 | ||
| 
 | ||
| pushParentAccount :: AccountName -> JournalParser m ()
 | ||
| pushParentAccount acct = modify' (\j -> j{jparseparentaccounts = acct : jparseparentaccounts j})
 | ||
| 
 | ||
| popParentAccount :: JournalParser m ()
 | ||
| popParentAccount = do
 | ||
|   j <- get
 | ||
|   case jparseparentaccounts j of
 | ||
|     []       -> unexpected (Tokens ('E' :| "nd of apply account block with no beginning"))
 | ||
|     (_:rest) -> put j{jparseparentaccounts=rest}
 | ||
| 
 | ||
| getParentAccount :: JournalParser m AccountName
 | ||
| getParentAccount = fmap (concatAccountNames . reverse . jparseparentaccounts) get
 | ||
| 
 | ||
| addAccountAlias :: MonadState Journal m => AccountAlias -> m ()
 | ||
| addAccountAlias a = modify' (\(j@Journal{..}) -> j{jparsealiases=a:jparsealiases})
 | ||
| 
 | ||
| getAccountAliases :: MonadState Journal m => m [AccountAlias]
 | ||
| getAccountAliases = fmap jparsealiases get
 | ||
| 
 | ||
| clearAccountAliases :: MonadState Journal m => m ()
 | ||
| clearAccountAliases = modify' (\(j@Journal{..}) -> j{jparsealiases=[]})
 | ||
| 
 | ||
| -- getTransactionCount :: MonadState Journal m =>  m Integer
 | ||
| -- getTransactionCount = fmap jparsetransactioncount get
 | ||
| --
 | ||
| -- setTransactionCount :: MonadState Journal m => Integer -> m ()
 | ||
| -- setTransactionCount i = modify' (\j -> j{jparsetransactioncount=i})
 | ||
| --
 | ||
| -- -- | Increment the transaction index by one and return the new value.
 | ||
| -- incrementTransactionCount :: MonadState Journal m => m Integer
 | ||
| -- incrementTransactionCount = do
 | ||
| --   modify' (\j -> j{jparsetransactioncount=jparsetransactioncount j + 1})
 | ||
| --   getTransactionCount
 | ||
| 
 | ||
| journalAddFile :: (FilePath,Text) -> Journal -> Journal
 | ||
| journalAddFile f j@Journal{jfiles=fs} = j{jfiles=fs++[f]}
 | ||
|   -- append, unlike the other fields, even though we do a final reverse,
 | ||
|   -- to compensate for additional reversal due to including/monoid-concatting
 | ||
| 
 | ||
| -- -- | Terminate parsing entirely, returning the given error message
 | ||
| -- -- with the current parse position prepended.
 | ||
| -- parserError :: String -> ErroringJournalParser a
 | ||
| -- parserError s = do
 | ||
| --   pos <- getPosition
 | ||
| --   parserErrorAt pos s
 | ||
| 
 | ||
| -- | Terminate parsing entirely, returning the given error message
 | ||
| -- with the given parse position prepended.
 | ||
| parserErrorAt :: Monad m => SourcePos -> String -> ErroringJournalParser m a
 | ||
| parserErrorAt pos s = throwError $ sourcePosPretty pos ++ ":\n" ++ s
 | ||
| 
 | ||
| --- * parsers
 | ||
| --- ** transaction bits
 | ||
| 
 | ||
| statusp :: TextParser m Status
 | ||
| statusp =
 | ||
|   choice'
 | ||
|     [ skipMany spacenonewline >> char '*' >> return Cleared
 | ||
|     , skipMany spacenonewline >> char '!' >> return Pending
 | ||
|     , return Unmarked
 | ||
|     ]
 | ||
|     <?> "cleared status"
 | ||
| 
 | ||
| codep :: TextParser m String
 | ||
| codep = try (do { skipSome spacenonewline; char '(' <?> "codep"; anyChar `manyTill` char ')' } ) <|> return ""
 | ||
| 
 | ||
| descriptionp :: JournalParser m String
 | ||
| descriptionp = many (noneOf (";\n" :: [Char]))
 | ||
| 
 | ||
| --- ** dates
 | ||
| 
 | ||
| -- | Parse a date in YYYY/MM/DD format.
 | ||
| -- Hyphen (-) and period (.) are also allowed as separators.
 | ||
| -- The year may be omitted if a default year has been set.
 | ||
| -- Leading zeroes may be omitted.
 | ||
| datep :: JournalParser m Day
 | ||
| datep = do
 | ||
|   -- hacky: try to ensure precise errors for invalid dates
 | ||
|   -- XXX reported error position is not too good
 | ||
|   -- pos <- genericSourcePos <$> getPosition
 | ||
|   datestr <- do
 | ||
|     c <- digitChar
 | ||
|     cs <- lift $ many $ choice' [digitChar, datesepchar]
 | ||
|     return $ c:cs
 | ||
|   let sepchars = nub $ sort $ filter (`elem` datesepchars) datestr
 | ||
|   when (length sepchars /= 1) $ fail $ "bad date, different separators used: " ++ datestr
 | ||
|   let dateparts = wordsBy (`elem` datesepchars) datestr
 | ||
|   currentyear <- getYear
 | ||
|   [y,m,d] <- case (dateparts,currentyear) of
 | ||
|               ([m,d],Just y)  -> return [show y,m,d]
 | ||
|               ([_,_],Nothing) -> fail $ "partial date "++datestr++" found, but the current year is unknown"
 | ||
|               ([y,m,d],_)     -> return [y,m,d]
 | ||
|               _               -> fail $ "bad date: " ++ datestr
 | ||
|   let maybedate = fromGregorianValid (read y) (read m) (read d)
 | ||
|   case maybedate of
 | ||
|     Nothing   -> fail $ "bad date: " ++ datestr
 | ||
|     Just date -> return date
 | ||
|   <?> "full or partial date"
 | ||
| 
 | ||
| -- | Parse a date and time in YYYY/MM/DD HH:MM[:SS][+-ZZZZ] format.
 | ||
| -- Hyphen (-) and period (.) are also allowed as date separators.
 | ||
| -- The year may be omitted if a default year has been set.
 | ||
| -- Seconds are optional.
 | ||
| -- The timezone is optional and ignored (the time is always interpreted as a local time).
 | ||
| -- Leading zeroes may be omitted (except in a timezone).
 | ||
| datetimep :: JournalParser m LocalTime
 | ||
| datetimep = do
 | ||
|   day <- datep
 | ||
|   lift $ skipSome spacenonewline
 | ||
|   h <- some digitChar
 | ||
|   let h' = read h
 | ||
|   guard $ h' >= 0 && h' <= 23
 | ||
|   char ':'
 | ||
|   m <- some digitChar
 | ||
|   let m' = read m
 | ||
|   guard $ m' >= 0 && m' <= 59
 | ||
|   s <- optional $ char ':' >> some digitChar
 | ||
|   let s' = case s of Just sstr -> read sstr
 | ||
|                      Nothing   -> 0
 | ||
|   guard $ s' >= 0 && s' <= 59
 | ||
|   {- tz <- -}
 | ||
|   optional $ do
 | ||
|                    plusminus <- oneOf ("-+" :: [Char])
 | ||
|                    d1 <- digitChar
 | ||
|                    d2 <- digitChar
 | ||
|                    d3 <- digitChar
 | ||
|                    d4 <- digitChar
 | ||
|                    return $ plusminus:d1:d2:d3:d4:""
 | ||
|   -- ltz <- liftIO $ getCurrentTimeZone
 | ||
|   -- let tz' = maybe ltz (fromMaybe ltz . parseTime defaultTimeLocale "%z") tz
 | ||
|   -- return $ localTimeToUTC tz' $ LocalTime day $ TimeOfDay h' m' (fromIntegral s')
 | ||
|   return $ LocalTime day $ TimeOfDay h' m' (fromIntegral s')
 | ||
| 
 | ||
| secondarydatep :: Day -> JournalParser m Day
 | ||
| secondarydatep primarydate = do
 | ||
|   char '='
 | ||
|   -- kludgy way to use primary date for default year
 | ||
|   let withDefaultYear d p = do
 | ||
|         y <- getYear
 | ||
|         let (y',_,_) = toGregorian d in setYear y'
 | ||
|         r <- p
 | ||
|         when (isJust y) $ setYear $ fromJust y -- XXX
 | ||
|         -- mapM setYear <$> y
 | ||
|         return r
 | ||
|   withDefaultYear primarydate datep
 | ||
| 
 | ||
| -- |
 | ||
| -- >> parsewith twoorthreepartdatestringp "2016/01/2"
 | ||
| -- Right "2016/01/2"
 | ||
| -- twoorthreepartdatestringp = do
 | ||
| --   n1 <- some digitChar
 | ||
| --   c <- datesepchar
 | ||
| --   n2 <- some digitChar
 | ||
| --   mn3 <- optional $ char c >> some digitChar
 | ||
| --   return $ n1 ++ c:n2 ++ maybe "" (c:) mn3
 | ||
| 
 | ||
| --- ** account names
 | ||
| 
 | ||
| -- | Parse an account name, then apply any parent account prefix and/or account aliases currently in effect.
 | ||
| modifiedaccountnamep :: JournalParser m AccountName
 | ||
| modifiedaccountnamep = do
 | ||
|   parent <- getParentAccount
 | ||
|   aliases <- getAccountAliases
 | ||
|   a <- lift accountnamep
 | ||
|   return $
 | ||
|     accountNameApplyAliases aliases $
 | ||
|      -- XXX accountNameApplyAliasesMemo ? doesn't seem to make a difference
 | ||
|     joinAccountNames parent
 | ||
|     a
 | ||
| 
 | ||
| -- | Parse an account name. Account names start with a non-space, may
 | ||
| -- have single spaces inside them, and are terminated by two or more
 | ||
| -- spaces (or end of input). Also they have one or more components of
 | ||
| -- at least one character, separated by the account separator char.
 | ||
| -- (This parser will also consume one following space, if present.)
 | ||
| accountnamep :: TextParser m AccountName
 | ||
| accountnamep = do
 | ||
|     astr <- do
 | ||
|       c <- nonspace
 | ||
|       cs <- striptrailingspace <$> many (nonspace <|> singlespace)
 | ||
|       return $ c:cs
 | ||
|     let a = T.pack astr
 | ||
|     when (accountNameFromComponents (accountNameComponents a) /= a)
 | ||
|          (fail $ "account name seems ill-formed: "++astr)
 | ||
|     return a
 | ||
|     where
 | ||
|       singlespace = try (do {spacenonewline; do {notFollowedBy spacenonewline; return ' '}})
 | ||
|       striptrailingspace "" = ""
 | ||
|       striptrailingspace s  = if last s == ' ' then init s else s
 | ||
| 
 | ||
| -- accountnamechar = notFollowedBy (oneOf "()[]") >> nonspace
 | ||
| --     <?> "account name character (non-bracket, non-parenthesis, non-whitespace)"
 | ||
| 
 | ||
| --- ** amounts
 | ||
| 
 | ||
| -- | Parse whitespace then an amount, with an optional left or right
 | ||
| -- currency symbol and optional price, or return the special
 | ||
| -- "missing" marker amount.
 | ||
| spaceandamountormissingp :: Monad m => JournalParser m MixedAmount
 | ||
| spaceandamountormissingp =
 | ||
|   try (do
 | ||
|         lift $ skipSome spacenonewline
 | ||
|         (Mixed . (:[])) `fmap` amountp <|> return missingmixedamt
 | ||
|       ) <|> return missingmixedamt
 | ||
| 
 | ||
| #ifdef TESTS
 | ||
| assertParseEqual' :: (Show a, Eq a) => (Either ParseError a) -> a -> Assertion
 | ||
| assertParseEqual' parse expected = either (assertFailure.show) (`is'` expected) parse
 | ||
| 
 | ||
| is' :: (Eq a, Show a) => a -> a -> Assertion
 | ||
| a `is'` e = assertEqual e a
 | ||
| 
 | ||
| test_spaceandamountormissingp = do
 | ||
|     assertParseEqual' (parseWithState mempty spaceandamountormissingp " $47.18") (Mixed [usd 47.18])
 | ||
|     assertParseEqual' (parseWithState mempty spaceandamountormissingp "$47.18") missingmixedamt
 | ||
|     assertParseEqual' (parseWithState mempty spaceandamountormissingp " ") missingmixedamt
 | ||
|     assertParseEqual' (parseWithState mempty spaceandamountormissingp "") missingmixedamt
 | ||
| #endif
 | ||
| 
 | ||
| -- | Parse a single-commodity amount, with optional symbol on the left or
 | ||
| -- right, optional unit or total price, and optional (ignored)
 | ||
| -- ledger-style balance assertion or fixed lot price declaration.
 | ||
| amountp :: Monad m => JournalParser m Amount
 | ||
| amountp = try leftsymbolamountp <|> try rightsymbolamountp <|> nosymbolamountp
 | ||
| 
 | ||
| #ifdef TESTS
 | ||
| test_amountp = do
 | ||
|     assertParseEqual' (parseWithState mempty amountp "$47.18") (usd 47.18)
 | ||
|     assertParseEqual' (parseWithState mempty amountp "$1.") (usd 1 `withPrecision` 0)
 | ||
|   -- ,"amount with unit price" ~: do
 | ||
|     assertParseEqual'
 | ||
|      (parseWithState mempty amountp "$10 @ €0.5")
 | ||
|      (usd 10 `withPrecision` 0 `at` (eur 0.5 `withPrecision` 1))
 | ||
|   -- ,"amount with total price" ~: do
 | ||
|     assertParseEqual'
 | ||
|      (parseWithState mempty amountp "$10 @@ €5")
 | ||
|      (usd 10 `withPrecision` 0 @@ (eur 5 `withPrecision` 0))
 | ||
| #endif
 | ||
| 
 | ||
| -- | Parse an amount from a string, or get an error.
 | ||
| amountp' :: String -> Amount
 | ||
| amountp' s =
 | ||
|   case runParser (evalStateT (amountp <* eof) mempty) "" (T.pack s) of
 | ||
|     Right amt -> amt
 | ||
|     Left err  -> error' $ show err -- XXX should throwError
 | ||
| 
 | ||
| -- | Parse a mixed amount from a string, or get an error.
 | ||
| mamountp' :: String -> MixedAmount
 | ||
| mamountp' = Mixed . (:[]) . amountp'
 | ||
| 
 | ||
| signp :: TextParser m String
 | ||
| signp = do
 | ||
|   sign <- optional $ oneOf ("+-" :: [Char])
 | ||
|   return $ case sign of Just '-' -> "-"
 | ||
|                         _        -> ""
 | ||
| 
 | ||
| multiplierp :: TextParser m Bool
 | ||
| multiplierp = do
 | ||
|   multiplier <- optional $ oneOf ("*" :: [Char])
 | ||
|   return $ case multiplier of Just '*' -> True
 | ||
|                               _        -> False
 | ||
| 
 | ||
| -- | This is like skipMany but it returns True if at least one element
 | ||
| -- was skipped. This is helpful if you’re just using many to check if
 | ||
| -- the resulting list is empty or not.
 | ||
| skipMany' :: MonadPlus m => m a -> m Bool
 | ||
| skipMany' p = go False
 | ||
|   where
 | ||
|     go !isNull = do
 | ||
|       more <- option False (True <$ p)
 | ||
|       if more
 | ||
|         then go True
 | ||
|         else pure isNull
 | ||
| 
 | ||
| leftsymbolamountp :: Monad m => JournalParser m Amount
 | ||
| leftsymbolamountp = do
 | ||
|   sign <- lift signp
 | ||
|   m <- lift multiplierp
 | ||
|   c <- lift commoditysymbolp
 | ||
|   suggestedStyle <- getAmountStyle c
 | ||
|   commodityspaced <- lift $ skipMany' spacenonewline
 | ||
|   (q,prec,mdec,mgrps) <- lift $ numberp suggestedStyle
 | ||
|   let s = amountstyle{ascommodityside=L, ascommodityspaced=commodityspaced, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
 | ||
|   p <- priceamountp
 | ||
|   let applysign = if sign=="-" then negate else id
 | ||
|   return $ applysign $ Amount c q p s m
 | ||
|   <?> "left-symbol amount"
 | ||
| 
 | ||
| rightsymbolamountp :: Monad m => JournalParser m Amount
 | ||
| rightsymbolamountp = do
 | ||
|   m <- lift multiplierp
 | ||
|   sign <- lift signp
 | ||
|   rawnum <- lift $ rawnumberp
 | ||
|   expMod <- lift . option id $ try exponentp
 | ||
|   commodityspaced <- lift $ skipMany' spacenonewline
 | ||
|   c <- lift commoditysymbolp
 | ||
|   suggestedStyle <- getAmountStyle c
 | ||
|   let (q0,prec0,mdec,mgrps) = fromRawNumber suggestedStyle (sign == "-") rawnum
 | ||
|       (q, prec) = expMod (q0, prec0)
 | ||
|   p <- priceamountp
 | ||
|   let s = amountstyle{ascommodityside=R, ascommodityspaced=commodityspaced, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
 | ||
|   return $ Amount c q p s m
 | ||
|   <?> "right-symbol amount"
 | ||
| 
 | ||
| nosymbolamountp :: Monad m => JournalParser m Amount
 | ||
| nosymbolamountp = do
 | ||
|   m <- lift multiplierp
 | ||
|   suggestedStyle <- getDefaultAmountStyle
 | ||
|   (q,prec,mdec,mgrps) <- lift $ numberp suggestedStyle
 | ||
|   p <- priceamountp
 | ||
|   -- apply the most recently seen default commodity and style to this commodityless amount
 | ||
|   defcs <- getDefaultCommodityAndStyle
 | ||
|   let (c,s) = case defcs of
 | ||
|         Just (defc,defs) -> (defc, defs{asprecision=max (asprecision defs) prec})
 | ||
|         Nothing          -> ("", amountstyle{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps})
 | ||
|   return $ Amount c q p s m
 | ||
|   <?> "no-symbol amount"
 | ||
| 
 | ||
| commoditysymbolp :: TextParser m CommoditySymbol
 | ||
| commoditysymbolp = (quotedcommoditysymbolp <|> simplecommoditysymbolp) <?> "commodity symbol"
 | ||
| 
 | ||
| quotedcommoditysymbolp :: TextParser m CommoditySymbol
 | ||
| quotedcommoditysymbolp = do
 | ||
|   char '"'
 | ||
|   s <- some $ noneOf (";\n\"" :: [Char])
 | ||
|   char '"'
 | ||
|   return $ T.pack s
 | ||
| 
 | ||
| simplecommoditysymbolp :: TextParser m CommoditySymbol
 | ||
| simplecommoditysymbolp = T.pack <$> some (noneOf nonsimplecommoditychars)
 | ||
| 
 | ||
| priceamountp :: Monad m => JournalParser m Price
 | ||
| priceamountp =
 | ||
|     try (do
 | ||
|           lift (skipMany spacenonewline)
 | ||
|           char '@'
 | ||
|           try (do
 | ||
|                 char '@'
 | ||
|                 lift (skipMany spacenonewline)
 | ||
|                 a <- amountp -- XXX can parse more prices ad infinitum, shouldn't
 | ||
|                 return $ TotalPrice a)
 | ||
|            <|> (do
 | ||
|             lift (skipMany spacenonewline)
 | ||
|             a <- amountp -- XXX can parse more prices ad infinitum, shouldn't
 | ||
|             return $ UnitPrice a))
 | ||
|          <|> return NoPrice
 | ||
| 
 | ||
| partialbalanceassertionp :: Monad m => JournalParser m BalanceAssertion
 | ||
| partialbalanceassertionp =
 | ||
|     try (do
 | ||
|           lift (skipMany spacenonewline)
 | ||
|           sourcepos <- genericSourcePos <$> lift getPosition
 | ||
|           char '='
 | ||
|           lift (skipMany spacenonewline)
 | ||
|           a <- amountp -- XXX should restrict to a simple amount
 | ||
|           return $ Just (a, sourcepos))
 | ||
|          <|> return Nothing
 | ||
| 
 | ||
| -- balanceassertion :: Monad m => TextParser m (Maybe MixedAmount)
 | ||
| -- balanceassertion =
 | ||
| --     try (do
 | ||
| --           lift (skipMany spacenonewline)
 | ||
| --           string "=="
 | ||
| --           lift (skipMany spacenonewline)
 | ||
| --           a <- amountp -- XXX should restrict to a simple amount
 | ||
| --           return $ Just $ Mixed [a])
 | ||
| --          <|> return Nothing
 | ||
| 
 | ||
| -- http://ledger-cli.org/3.0/doc/ledger3.html#Fixing-Lot-Prices
 | ||
| fixedlotpricep :: Monad m => JournalParser m (Maybe Amount)
 | ||
| fixedlotpricep =
 | ||
|     try (do
 | ||
|           lift (skipMany spacenonewline)
 | ||
|           char '{'
 | ||
|           lift (skipMany spacenonewline)
 | ||
|           char '='
 | ||
|           lift (skipMany spacenonewline)
 | ||
|           a <- amountp -- XXX should restrict to a simple amount
 | ||
|           lift (skipMany spacenonewline)
 | ||
|           char '}'
 | ||
|           return $ Just a)
 | ||
|          <|> return Nothing
 | ||
| 
 | ||
| -- | Parse a string representation of a number for its value and display
 | ||
| -- attributes.
 | ||
| --
 | ||
| -- Some international number formats are accepted, eg either period or comma
 | ||
| -- may be used for the decimal point, and the other of these may be used for
 | ||
| -- separating digit groups in the integer part. See
 | ||
| -- http://en.wikipedia.org/wiki/Decimal_separator for more examples.
 | ||
| --
 | ||
| -- This returns: the parsed numeric value, the precision (number of digits
 | ||
| -- seen following the decimal point), the decimal point character used if any,
 | ||
| -- and the digit group style if any.
 | ||
| --
 | ||
| numberp :: Maybe AmountStyle -> TextParser m (Quantity, Int, Maybe Char, Maybe DigitGroupStyle)
 | ||
| numberp suggestedStyle = do
 | ||
|     -- a number is an optional sign followed by a sequence of digits possibly
 | ||
|     -- interspersed with periods, commas, or both
 | ||
|     -- ptrace "numberp"
 | ||
|     sign <- signp
 | ||
|     raw <- rawnumberp
 | ||
|     dbg8 "numberp suggestedStyle" suggestedStyle `seq` return ()
 | ||
|     let num@(q, prec, decSep, groups) = dbg8 "numberp quantity,precision,mdecimalpoint,mgrps" (fromRawNumber suggestedStyle (sign == "-") raw)
 | ||
|     option num . try $ do
 | ||
|         when (isJust groups) $ fail "groups and exponent are not mixable"
 | ||
|         (q', prec') <- exponentp <*> pure (q, prec)
 | ||
|         return (q', prec', decSep, groups)
 | ||
|     <?> "numberp"
 | ||
| 
 | ||
| exponentp :: TextParser m ((Quantity, Int) -> (Quantity, Int))
 | ||
| exponentp = do
 | ||
|     char' 'e'
 | ||
|     exp <- liftM read $ (++) <$> signp <*> some digitChar
 | ||
|     return $ (* 10^^exp) *** (0 `max`) . (+ (-exp))
 | ||
|     <?> "exponentp"
 | ||
| 
 | ||
| -- | Interpret the raw parts of a number, using the provided amount style if any,
 | ||
| -- determining the decimal point character and digit groups where possible.
 | ||
| -- Returns:
 | ||
| -- - the decimal number
 | ||
| -- - the precision (number of digits after the decimal point)  
 | ||
| -- - the decimal point character, if any
 | ||
| -- - the digit group style, if any (digit group character and sizes of digit groups)
 | ||
| fromRawNumber :: Maybe AmountStyle -> Bool -> (Maybe Char, [String], Maybe (Char, String)) -> (Quantity, Int, Maybe Char, Maybe DigitGroupStyle)
 | ||
| fromRawNumber suggestedStyle negated raw = (quantity, precision, mdecimalpoint, mgrps) where
 | ||
|     -- unpack with a hint if useful
 | ||
|     (mseparator, intparts, mdecimalpoint, frac) =
 | ||
|             case raw of
 | ||
|                 -- just a single punctuation between two digits groups, assume it's a decimal point
 | ||
|                 (Just s, [firstGroup, lastGroup], Nothing)
 | ||
|                     -- if have a decimalHint restrict this assumpion only to a matching separator
 | ||
|                     | maybe True (`asdecimalcheck` s) suggestedStyle -> (Nothing, [firstGroup], Just s, lastGroup)
 | ||
| 
 | ||
|                 (firstSep, digitGroups, Nothing) -> (firstSep, digitGroups, Nothing, [])
 | ||
|                 (firstSep, digitGroups, Just (d, frac)) -> (firstSep, digitGroups, Just d, frac)
 | ||
| 
 | ||
|     -- get the digit group sizes and digit group style if any
 | ||
|     groupsizes = reverse $ case map length intparts of
 | ||
|                                (a:b:cs) | a < b -> b:cs
 | ||
|                                gs               -> gs
 | ||
|     mgrps = (`DigitGroups` groupsizes) <$> mseparator
 | ||
| 
 | ||
|     -- put the parts back together without digit group separators, get the precision and parse the value
 | ||
|     repr = (if negated then "-" else "") ++ "0" ++ concat intparts ++ (if null frac then "" else "." ++ frac)
 | ||
|     quantity = read repr
 | ||
|     precision = length frac
 | ||
| 
 | ||
|     asdecimalcheck :: AmountStyle -> Char -> Bool
 | ||
|     asdecimalcheck = \case
 | ||
|         AmountStyle{asdecimalpoint = Just d} -> (d ==)
 | ||
|         AmountStyle{asdigitgroups = Just (DigitGroups g _)} -> (g /=)
 | ||
|         AmountStyle{asprecision = 0} -> const False
 | ||
|         _ -> const True
 | ||
| 
 | ||
| -- | Pre-parse a number into parts for further interpretation.
 | ||
| -- Numbers may optionally have a period/comma decimal point 
 | ||
| -- and/or comma/period/space digit group separators, but we don't
 | ||
| -- decide which is which here, just return the parts:
 | ||
| -- 
 | ||
| -- - the first separator char (period or comma or space) seen, if any
 | ||
| -- 
 | ||
| -- - the digit group(s), possibly several separated by the above char, occuring before..
 | ||
| -- 
 | ||
| -- - the second and last separator char, and following digit group, if any.
 | ||
| -- 
 | ||
| -- >>> 1,234,567.89
 | ||
| -- ( Just ',', ["1","234","567"], Just ('.', "89") )
 | ||
| -- >>> 1 000
 | ||
| -- ( Just ' ', ["1","000"], Nothing )
 | ||
| rawnumberp :: TextParser m ( Maybe Char , [String] , Maybe (Char, String) )
 | ||
| rawnumberp = do
 | ||
|     let sepChars = ['.', ','] -- all allowed punctuation characters
 | ||
| 
 | ||
|     (firstSep, groups) <- option (Nothing, []) $ do
 | ||
|         leadingDigits <- some digitChar
 | ||
|         option (Nothing, [leadingDigits]) . try $ do
 | ||
|             firstSep <- oneOf sepChars <|> whitespaceChar
 | ||
|             groups <- some digitChar `sepBy1` char firstSep
 | ||
|             return (Just firstSep, leadingDigits : groups)
 | ||
| 
 | ||
|     let remSepChars = maybe sepChars (`delete` sepChars) firstSep
 | ||
|         modifier
 | ||
|             | null groups = fmap Just  -- if no digits so far, we require at least some decimals
 | ||
|             | otherwise = optional
 | ||
| 
 | ||
|     extraGroup <- modifier $ do
 | ||
|         lastSep <- oneOf remSepChars
 | ||
|         digits <- modifier $ some digitChar  -- decimal separator allowed to be without digits if had some before
 | ||
|         return (lastSep, fromMaybe [] digits)
 | ||
| 
 | ||
|     -- make sure we didn't leading part of mistyped number
 | ||
|     notFollowedBy $ oneOf sepChars <|> (whitespaceChar >> digitChar)
 | ||
| 
 | ||
|     return $ dbg8 "rawnumberp" (firstSep, groups, extraGroup)
 | ||
|     <?> "rawnumberp"
 | ||
| 
 | ||
| -- | Parse a unicode char that represents any non-control space char (Zs general category).
 | ||
| whitespaceChar :: TextParser m Char
 | ||
| whitespaceChar = charCategory Space
 | ||
| 
 | ||
| -- test_numberp = do
 | ||
| --       let s `is` n = assertParseEqual (parseWithState mempty numberp s) n
 | ||
| --           assertFails = assertBool . isLeft . parseWithState mempty numberp
 | ||
| --       assertFails ""
 | ||
| --       "0"          `is` (0, 0, '.', ',', [])
 | ||
| --       "1"          `is` (1, 0, '.', ',', [])
 | ||
| --       "1.1"        `is` (1.1, 1, '.', ',', [])
 | ||
| --       "1,000.1"    `is` (1000.1, 1, '.', ',', [3])
 | ||
| --       "1.00.000,1" `is` (100000.1, 1, ',', '.', [3,2])
 | ||
| --       "1,000,000"  `is` (1000000, 0, '.', ',', [3,3])
 | ||
| --       "1."         `is` (1,   0, '.', ',', [])
 | ||
| --       "1,"         `is` (1,   0, ',', '.', [])
 | ||
| --       ".1"         `is` (0.1, 1, '.', ',', [])
 | ||
| --       ",1"         `is` (0.1, 1, ',', '.', [])
 | ||
| --       assertFails "1,000.000,1"
 | ||
| --       assertFails "1.000,000.1"
 | ||
| --       assertFails "1,000.000.1"
 | ||
| --       assertFails "1,,1"
 | ||
| --       assertFails "1..1"
 | ||
| --       assertFails ".1,"
 | ||
| --       assertFails ",1."
 | ||
| 
 | ||
| --- ** comments
 | ||
| 
 | ||
| multilinecommentp :: JournalParser m ()
 | ||
| multilinecommentp = startComment *> anyLine `skipManyTill` endComment
 | ||
|   where
 | ||
|     emptylinep = lift (skipMany spacenonewline) *> newline *> pure ()
 | ||
|     startComment = string "comment" >> emptylinep
 | ||
|     endComment = eof <|> (string "end comment" >> emptylinep)
 | ||
|     anyLine = anyChar `manyTill` newline
 | ||
| 
 | ||
| emptyorcommentlinep :: JournalParser m ()
 | ||
| emptyorcommentlinep = do
 | ||
|   lift (skipMany spacenonewline)
 | ||
|   void linecommentp <|> void newline
 | ||
| 
 | ||
| -- | Parse a possibly multi-line comment following a semicolon.
 | ||
| followingcommentp :: JournalParser m Text
 | ||
| followingcommentp = do
 | ||
|   -- ptrace "followingcommentp"
 | ||
|   lift (skipMany spacenonewline)
 | ||
|   samelinecomment <- try commentp <|> (newline >> return "")
 | ||
|   newlinecomments <- many $ try $ lift (skipSome spacenonewline) >> commentp
 | ||
|   return $ T.unlines $ samelinecomment:newlinecomments
 | ||
| 
 | ||
| -- | Parse a possibly multi-line comment following a semicolon, and
 | ||
| -- any tags and/or posting dates within it. Posting dates can be
 | ||
| -- expressed with "date"/"date2" tags and/or bracketed dates.  The
 | ||
| -- dates are parsed in full here so that errors are reported in the
 | ||
| -- right position. Missing years can be inferred if a default date is
 | ||
| -- provided.
 | ||
| --
 | ||
| -- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; a:b, date:3/4, [=5/6]"
 | ||
| -- Right ("a:b, date:3/4, [=5/6]\n",[("a","b"),("date","3/4")],Just 2000-03-04,Just 2000-05-06)
 | ||
| --
 | ||
| -- Year unspecified and no default provided -> unknown year error, at correct position:
 | ||
| -- >>> rejp (followingcommentandtagsp Nothing) "  ;    xxx   date:3/4\n  ; second line"
 | ||
| -- Left ...1:22...partial date 3/4 found, but the current year is unknown...
 | ||
| --
 | ||
| -- Date tag value contains trailing text - forgot the comma, confused:
 | ||
| -- the syntaxes ?  We'll accept the leading date anyway
 | ||
| -- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; date:3/4=5/6"
 | ||
| -- Right ("date:3/4=5/6\n",[("date","3/4=5/6")],Just 2000-03-04,Nothing)
 | ||
| --
 | ||
| followingcommentandtagsp :: MonadIO m => Maybe Day
 | ||
|                          -> ErroringJournalParser m (Text, [Tag], Maybe Day, Maybe Day)
 | ||
| followingcommentandtagsp mdefdate = do
 | ||
|   -- pdbg 0 "followingcommentandtagsp"
 | ||
| 
 | ||
|   -- Parse a single or multi-line comment, starting on this line or the next one.
 | ||
|   -- Save the starting position and preserve all whitespace for the subsequent re-parsing,
 | ||
|   -- to get good error positions.
 | ||
|   startpos <- getPosition
 | ||
|   commentandwhitespace :: String <- do
 | ||
|     let commentp' = (:) <$> char ';' <*> anyChar `manyTill` eolof
 | ||
|     sp1 <- lift (many spacenonewline)
 | ||
|     l1  <- try (lift commentp') <|> (newline >> return "")
 | ||
|     ls  <- lift . many $ try ((++) <$> some spacenonewline <*> commentp')
 | ||
|     return $ unlines $ (sp1 ++ l1) : ls
 | ||
|   let comment = T.pack $ unlines $ map (lstrip . dropWhile (==';') . strip) $ lines commentandwhitespace
 | ||
|   -- pdbg 0 $ "commentws:"++show commentandwhitespace
 | ||
|   -- pdbg 0 $ "comment:"++show comment
 | ||
| 
 | ||
|   -- Reparse the comment for any tags.
 | ||
|   tags <- case runTextParser (setPosition startpos >> tagsp) $ T.pack commentandwhitespace of
 | ||
|             Right ts -> return ts
 | ||
|             Left e   -> throwError $ parseErrorPretty e
 | ||
|   -- pdbg 0 $ "tags: "++show tags
 | ||
| 
 | ||
|   -- Reparse the comment for any posting dates. Use the transaction date for defaults, if provided.
 | ||
|   epdates <- liftIO $ rejp (setPosition startpos >> postingdatesp mdefdate) $ T.pack commentandwhitespace
 | ||
|   pdates <- case epdates of
 | ||
|               Right ds -> return ds
 | ||
|               Left e   -> throwError e
 | ||
|   -- pdbg 0 $ "pdates: "++show pdates
 | ||
|   let mdate  = headMay $ map snd $ filter ((=="date").fst)  pdates
 | ||
|       mdate2 = headMay $ map snd $ filter ((=="date2").fst) pdates
 | ||
| 
 | ||
|   return (comment, tags, mdate, mdate2)
 | ||
| 
 | ||
| -- A transaction/posting comment must start with a semicolon.
 | ||
| -- This parser ignores leading whitespace.
 | ||
| commentp :: JournalParser m Text
 | ||
| commentp = commentStartingWithp ";"
 | ||
| 
 | ||
| -- A line (file-level) comment can start with a semicolon, hash,
 | ||
| -- or star (allowing org nodes). This parser ignores leading whitespace.
 | ||
| linecommentp :: JournalParser m Text
 | ||
| linecommentp = commentStartingWithp ";#*" 
 | ||
| 
 | ||
| commentStartingWithp :: [Char] -> JournalParser m Text
 | ||
| commentStartingWithp cs = do
 | ||
|   -- ptrace "commentStartingWith"
 | ||
|   oneOf cs
 | ||
|   lift (skipMany spacenonewline)
 | ||
|   l <- anyChar `manyTill` (lift eolof)
 | ||
|   optional newline
 | ||
|   return $ T.pack l
 | ||
| 
 | ||
| --- ** tags
 | ||
| 
 | ||
| -- | Extract any tags (name:value ended by comma or newline) embedded in a string.
 | ||
| --
 | ||
| -- >>> commentTags "a b:, c:c d:d, e"
 | ||
| -- [("b",""),("c","c d:d")]
 | ||
| --
 | ||
| -- >>> commentTags "a [1/1/1] [1/1] [1], [=1/1/1] [=1/1] [=1] [1/1=1/1/1] [1=1/1/1] b:c"
 | ||
| -- [("b","c")]
 | ||
| --
 | ||
| -- --[("date","1/1/1"),("date","1/1"),("date2","1/1/1"),("date2","1/1"),("date","1/1"),("date2","1/1/1"),("date","1"),("date2","1/1/1")]
 | ||
| --
 | ||
| -- >>> commentTags "\na b:, \nd:e, f"
 | ||
| -- [("b",""),("d","e")]
 | ||
| --
 | ||
| commentTags :: Text -> [Tag]
 | ||
| commentTags s =
 | ||
|   case runTextParser tagsp s of
 | ||
|     Right r -> r
 | ||
|     Left _  -> [] -- shouldn't happen
 | ||
| 
 | ||
| -- | Parse all tags found in a string.
 | ||
| tagsp :: SimpleTextParser [Tag]
 | ||
| tagsp = -- do
 | ||
|   -- pdbg 0 $ "tagsp"
 | ||
|   many (try (nontagp >> tagp))
 | ||
| 
 | ||
| -- | Parse everything up till the first tag.
 | ||
| --
 | ||
| -- >>> rtp nontagp "\na b:, \nd:e, f"
 | ||
| -- Right "\na "
 | ||
| nontagp :: SimpleTextParser String
 | ||
| nontagp = -- do
 | ||
|   -- pdbg 0 "nontagp"
 | ||
|   -- anyChar `manyTill` (lookAhead (try (tagorbracketeddatetagsp Nothing >> return ()) <|> eof))
 | ||
|   anyChar `manyTill` lookAhead (try (void tagp) <|> eof)
 | ||
|   -- XXX costly ?
 | ||
| 
 | ||
| -- | Tags begin with a colon-suffixed tag name (a word beginning with
 | ||
| -- a letter) and are followed by a tag value (any text up to a comma
 | ||
| -- or newline, whitespace-stripped).
 | ||
| --
 | ||
| -- >>> rtp tagp "a:b b , c AuxDate: 4/2"
 | ||
| -- Right ("a","b b")
 | ||
| --
 | ||
| tagp :: SimpleTextParser Tag
 | ||
| tagp = do
 | ||
|   -- pdbg 0 "tagp"
 | ||
|   n <- tagnamep
 | ||
|   v <- tagvaluep
 | ||
|   return (n,v)
 | ||
| 
 | ||
| -- |
 | ||
| -- >>> rtp tagnamep "a:"
 | ||
| -- Right "a"
 | ||
| tagnamep :: SimpleTextParser Text
 | ||
| tagnamep = -- do
 | ||
|   -- pdbg 0 "tagnamep"
 | ||
|   T.pack <$> some (noneOf (": \t\n" :: [Char])) <* char ':'
 | ||
| 
 | ||
| tagvaluep :: TextParser m Text
 | ||
| tagvaluep = do
 | ||
|   -- ptrace "tagvalue"
 | ||
|   v <- anyChar `manyTill` (void (try (char ',')) <|> eolof)
 | ||
|   return $ T.pack $ strip $ reverse $ dropWhile (==',') $ reverse $ strip v
 | ||
| 
 | ||
| --- ** posting dates
 | ||
| 
 | ||
| -- | Parse all posting dates found in a string. Posting dates can be
 | ||
| -- expressed with date/date2 tags and/or bracketed dates.  The dates
 | ||
| -- are parsed fully to give useful errors. Missing years can be
 | ||
| -- inferred only if a default date is provided.
 | ||
| --
 | ||
| postingdatesp :: Monad m => Maybe Day -> ErroringJournalParser m [(TagName,Day)]
 | ||
| postingdatesp mdefdate = do
 | ||
|   -- pdbg 0 $ "postingdatesp"
 | ||
|   let p = ((:[]) <$> datetagp mdefdate) <|> bracketeddatetagsp mdefdate
 | ||
|       nonp =
 | ||
|          many (notFollowedBy p >> anyChar)
 | ||
|          -- anyChar `manyTill` (lookAhead (try (p >> return ()) <|> eof))
 | ||
|   concat <$> many (try (nonp >> p))
 | ||
| 
 | ||
| --- ** date tags
 | ||
| 
 | ||
| -- | Date tags are tags with name "date" or "date2". Their value is
 | ||
| -- parsed as a date, using the provided default date if any for
 | ||
| -- inferring a missing year if needed. Any error in date parsing is
 | ||
| -- reported and terminates parsing.
 | ||
| --
 | ||
| -- >>> rejp (datetagp Nothing) "date: 2000/1/2 "
 | ||
| -- Right ("date",2000-01-02)
 | ||
| --
 | ||
| -- >>> rejp (datetagp (Just $ fromGregorian 2001 2 3)) "date2:3/4"
 | ||
| -- Right ("date2",2001-03-04)
 | ||
| --
 | ||
| -- >>> rejp (datetagp Nothing) "date:  3/4"
 | ||
| -- Left ...1:9...partial date 3/4 found, but the current year is unknown...
 | ||
| --
 | ||
| datetagp :: Monad m => Maybe Day -> ErroringJournalParser m (TagName,Day)
 | ||
| datetagp mdefdate = do
 | ||
|   -- pdbg 0 "datetagp"
 | ||
|   string "date"
 | ||
|   n <- fromMaybe "" <$> optional (mptext "2")
 | ||
|   char ':'
 | ||
|   startpos <- getPosition
 | ||
|   v <- lift tagvaluep
 | ||
|   -- re-parse value as a date.
 | ||
|   j <- get
 | ||
|   let ep :: Either (ParseError Char MPErr) Day
 | ||
|       ep = parseWithState'
 | ||
|              j{jparsedefaultyear=first3.toGregorian <$> mdefdate}
 | ||
|              -- The value extends to a comma, newline, or end of file.
 | ||
|              -- It seems like ignoring any extra stuff following a date
 | ||
|              -- gives better errors here.
 | ||
|              (do
 | ||
|                  setPosition startpos
 | ||
|                  datep) -- <* eof)
 | ||
|              v
 | ||
|   case ep
 | ||
|     of Left e  -> throwError $ parseErrorPretty e
 | ||
|        Right d -> return ("date"<>n, d)
 | ||
| 
 | ||
| --- ** bracketed dates
 | ||
| 
 | ||
| -- tagorbracketeddatetagsp :: Monad m => Maybe Day -> TextParser u m [Tag]
 | ||
| -- tagorbracketeddatetagsp mdefdate =
 | ||
| --   bracketeddatetagsp mdefdate <|> ((:[]) <$> tagp)
 | ||
| 
 | ||
| -- | Parse Ledger-style bracketed posting dates ([DATE=DATE2]), as
 | ||
| -- "date" and/or "date2" tags. Anything that looks like an attempt at
 | ||
| -- this (a square-bracketed sequence of 0123456789/-.= containing at
 | ||
| -- least one digit and one date separator) is also parsed, and will
 | ||
| -- throw an appropriate error.
 | ||
| --
 | ||
| -- The dates are parsed in full here so that errors are reported in
 | ||
| -- the right position. A missing year in DATE can be inferred if a
 | ||
| -- default date is provided. A missing year in DATE2 will be inferred
 | ||
| -- from DATE.
 | ||
| --
 | ||
| -- >>> rejp (bracketeddatetagsp Nothing) "[2016/1/2=3/4]"
 | ||
| -- Right [("date",2016-01-02),("date2",2016-03-04)]
 | ||
| --
 | ||
| -- >>> rejp (bracketeddatetagsp Nothing) "[1]"
 | ||
| -- Left ...not a bracketed date...
 | ||
| --
 | ||
| -- >>> rejp (bracketeddatetagsp Nothing) "[2016/1/32]"
 | ||
| -- Left ...1:11:...bad date: 2016/1/32...
 | ||
| --
 | ||
| -- >>> rejp (bracketeddatetagsp Nothing) "[1/31]"
 | ||
| -- Left ...1:6:...partial date 1/31 found, but the current year is unknown...
 | ||
| --
 | ||
| -- >>> rejp (bracketeddatetagsp Nothing) "[0123456789/-.=/-.=]"
 | ||
| -- Left ...1:15:...bad date, different separators...
 | ||
| --
 | ||
| bracketeddatetagsp :: Monad m => Maybe Day -> ErroringJournalParser m [(TagName, Day)]
 | ||
| bracketeddatetagsp mdefdate = do
 | ||
|   -- pdbg 0 "bracketeddatetagsp"
 | ||
|   char '['
 | ||
|   startpos <- getPosition
 | ||
|   let digits = "0123456789"
 | ||
|   s <- some (oneOf $ '=':digits++datesepchars)
 | ||
|   char ']'
 | ||
|   unless (any (`elem` s) digits && any (`elem` datesepchars) s) $
 | ||
|     fail "not a bracketed date"
 | ||
| 
 | ||
|   -- looks sufficiently like a bracketed date, now we
 | ||
|   -- re-parse as dates and throw any errors
 | ||
|   j <- get
 | ||
|   let ep :: Either (ParseError Char MPErr) (Maybe Day, Maybe Day)
 | ||
|       ep = parseWithState'
 | ||
|              j{jparsedefaultyear=first3.toGregorian <$> mdefdate}
 | ||
|              (do
 | ||
|                setPosition startpos
 | ||
|                md1 <- optional datep
 | ||
|                maybe (return ()) (setYear.first3.toGregorian) md1
 | ||
|                md2 <- optional $ char '=' >> datep
 | ||
|                eof
 | ||
|                return (md1,md2)
 | ||
|              )
 | ||
|              (T.pack s)
 | ||
|   case ep
 | ||
|     of Left e          -> throwError $ parseErrorPretty e
 | ||
|        Right (md1,md2) -> return $ catMaybes
 | ||
|          [("date",) <$> md1, ("date2",) <$> md2]
 | ||
| 
 |