lib: Hledger.Read.CsvReader cleanup
Exports added: CsvRecord, CsvValue, csvFileFor Exports removed: expandIncludes, parseAndValidateCsvRules, transactionFromCsvRecord
This commit is contained in:
parent
5365fde3bc
commit
2c8a6e988f
@ -1,3 +1,6 @@
|
||||
-- * -*- eval: (orgstruct-mode 1); orgstruct-heading-prefix-regexp:"-- "; -*-
|
||||
-- ** doc
|
||||
-- In Emacs, use TAB on lines beginning with "-- *" to collapse/expand sections.
|
||||
{-|
|
||||
|
||||
A reader for CSV data, using an extra rules file to help interpret the data.
|
||||
@ -7,36 +10,40 @@ A reader for CSV data, using an extra rules file to help interpret the data.
|
||||
-- Here's a command that will render them:
|
||||
-- stack haddock hledger-lib --fast --no-haddock-deps --haddock-arguments='--ignore-all-exports' --open
|
||||
|
||||
-- ** language
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
{-# LANGUAGE TypeFamilies #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE ViewPatterns #-}
|
||||
{-# LANGUAGE TypeSynonymInstances #-}
|
||||
{-# LANGUAGE FlexibleInstances #-}
|
||||
{-# LANGUAGE PackageImports #-}
|
||||
{-# LANGUAGE MultiWayIf #-}
|
||||
{-# LANGUAGE NamedFieldPuns #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE PackageImports #-}
|
||||
{-# LANGUAGE RecordWildCards #-}
|
||||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
{-# LANGUAGE TypeFamilies #-}
|
||||
{-# LANGUAGE TypeSynonymInstances #-}
|
||||
{-# LANGUAGE ViewPatterns #-}
|
||||
|
||||
-- ** doctest setup
|
||||
-- $setup
|
||||
-- >>> :set -XOverloadedStrings
|
||||
|
||||
-- ** exports
|
||||
module Hledger.Read.CsvReader (
|
||||
-- * Reader
|
||||
reader,
|
||||
-- * Misc.
|
||||
CsvRecord,
|
||||
CSV, Record, Field,
|
||||
-- rules,
|
||||
CSV, CsvRecord, CsvValue,
|
||||
csvFileFor,
|
||||
rulesFileFor,
|
||||
parseRulesFile,
|
||||
parseAndValidateCsvRules,
|
||||
expandIncludes,
|
||||
transactionFromCsvRecord,
|
||||
printCSV,
|
||||
-- * Tests
|
||||
tests_CsvReader,
|
||||
)
|
||||
where
|
||||
|
||||
-- ** imports
|
||||
import Prelude ()
|
||||
import "base-compat-batteries" Prelude.Compat hiding (fail)
|
||||
import qualified "base-compat-batteries" Control.Monad.Fail.Compat as Fail (fail)
|
||||
@ -80,11 +87,13 @@ import Hledger.Data
|
||||
import Hledger.Utils
|
||||
import Hledger.Read.Common (Reader(..),InputOpts(..),amountp, statusp, genericSourcePos, finaliseJournal)
|
||||
|
||||
type CSV = [Record]
|
||||
-- ** some types
|
||||
|
||||
type Record = [Field]
|
||||
type CSV = [CsvRecord]
|
||||
type CsvRecord = [CsvValue]
|
||||
type CsvValue = String
|
||||
|
||||
type Field = String
|
||||
-- ** reader
|
||||
|
||||
reader :: Reader
|
||||
reader = Reader
|
||||
@ -110,185 +119,23 @@ parse iopts f t = do
|
||||
-- better preemptively reverse them once more. XXX inefficient
|
||||
pj' = journalReverse pj
|
||||
|
||||
-- | Parse special separator names TAB and SPACE, or return the first
|
||||
-- character. Return Nothing on empty string
|
||||
parseSeparator :: String -> Maybe Char
|
||||
parseSeparator = specials . map toLower
|
||||
where specials "space" = Just ' '
|
||||
specials "tab" = Just '\t'
|
||||
specials (x:_) = Just x
|
||||
specials [] = Nothing
|
||||
-- ** reading rules files
|
||||
-- *** rules utilities
|
||||
|
||||
-- | Read a Journal from the given CSV data (and filename, used for error
|
||||
-- messages), or return an error. Proceed as follows:
|
||||
--
|
||||
-- 1. parse CSV conversion rules from the specified rules file, or from
|
||||
-- the default rules file for the specified CSV file, if it exists,
|
||||
-- or throw a parse error; if it doesn't exist, use built-in default rules
|
||||
--
|
||||
-- 2. parse the CSV data, or throw a parse error
|
||||
--
|
||||
-- 3. convert the CSV records to transactions using the rules
|
||||
--
|
||||
-- 4. if the rules file didn't exist, create it with the default rules and filename
|
||||
--
|
||||
-- 5. return the transactions as a Journal
|
||||
--
|
||||
readJournalFromCsv :: Maybe FilePath -> FilePath -> Text -> IO (Either String Journal)
|
||||
readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when reading CSV from stdin"
|
||||
readJournalFromCsv mrulesfile csvfile csvdata =
|
||||
handle (\(e::IOException) -> return $ Left $ show e) $ do
|
||||
-- Not used by hledger; just for lib users,
|
||||
-- | An pure-exception-throwing IO action that parses this file's content
|
||||
-- as CSV conversion rules, interpolating any included files first,
|
||||
-- and runs some extra validation checks.
|
||||
parseRulesFile :: FilePath -> ExceptT String IO CsvRules
|
||||
parseRulesFile f =
|
||||
liftIO (readFilePortably f >>= expandIncludes (takeDirectory f))
|
||||
>>= either throwError return . parseAndValidateCsvRules f
|
||||
|
||||
-- make and throw an IO exception.. which we catch and convert to an Either above ?
|
||||
let throwerr = throw . userError
|
||||
|
||||
-- parse the csv rules
|
||||
let rulesfile = fromMaybe (rulesFileFor csvfile) mrulesfile
|
||||
rulesfileexists <- doesFileExist rulesfile
|
||||
rulestext <-
|
||||
if rulesfileexists
|
||||
then do
|
||||
dbg1IO "using conversion rules file" rulesfile
|
||||
readFilePortably rulesfile >>= expandIncludes (takeDirectory rulesfile)
|
||||
else
|
||||
return $ defaultRulesText rulesfile
|
||||
rules <- either throwerr return $ parseAndValidateCsvRules rulesfile rulestext
|
||||
dbg2IO "rules" rules
|
||||
|
||||
-- parse the skip directive's value, if any
|
||||
let skiplines = case getDirective "skip" rules of
|
||||
Nothing -> 0
|
||||
Just "" -> 1
|
||||
Just s -> readDef (throwerr $ "could not parse skip value: " ++ show s) s
|
||||
|
||||
-- parse csv
|
||||
-- parsec seems to fail if you pass it "-" here TODO: try again with megaparsec
|
||||
let parsecfilename = if csvfile == "-" then "(stdin)" else csvfile
|
||||
let separator = fromMaybe ',' (getDirective "separator" rules >>= parseSeparator)
|
||||
dbg2IO "separator" separator
|
||||
records <- (either throwerr id .
|
||||
dbg2 "validateCsv" . validateCsv rules skiplines .
|
||||
dbg2 "parseCsv")
|
||||
`fmap` parseCsv separator parsecfilename csvdata
|
||||
dbg1IO "first 3 csv records" $ take 3 records
|
||||
|
||||
-- identify header lines
|
||||
-- let (headerlines, datalines) = identifyHeaderLines records
|
||||
-- mfieldnames = lastMay headerlines
|
||||
|
||||
let
|
||||
-- convert CSV records to transactions
|
||||
txns = snd $ mapAccumL
|
||||
(\pos r ->
|
||||
let
|
||||
SourcePos name line col = pos
|
||||
line' = (mkPos . (+1) . unPos) line
|
||||
pos' = SourcePos name line' col
|
||||
in
|
||||
(pos, transactionFromCsvRecord pos' rules r)
|
||||
)
|
||||
(initialPos parsecfilename) records
|
||||
|
||||
-- Ensure transactions are ordered chronologically.
|
||||
-- First, if the CSV records seem to be most-recent-first (because
|
||||
-- there's an explicit "newest-first" directive, or there's more
|
||||
-- than one date and the first date is more recent than the last):
|
||||
-- reverse them to get same-date transactions ordered chronologically.
|
||||
txns' =
|
||||
(if newestfirst || mseemsnewestfirst == Just True then reverse else id) txns
|
||||
where
|
||||
newestfirst = dbg3 "newestfirst" $ isJust $ getDirective "newest-first" rules
|
||||
mseemsnewestfirst = dbg3 "mseemsnewestfirst" $
|
||||
case nub $ map tdate txns of
|
||||
ds | length ds > 1 -> Just $ head ds > last ds
|
||||
_ -> Nothing
|
||||
-- Second, sort by date.
|
||||
txns'' = sortBy (comparing tdate) txns'
|
||||
|
||||
when (not rulesfileexists) $ do
|
||||
dbg1IO "creating conversion rules file" rulesfile
|
||||
writeFile rulesfile $ T.unpack rulestext
|
||||
|
||||
return $ Right nulljournal{jtxns=txns''}
|
||||
|
||||
parseCsv :: Char -> FilePath -> Text -> IO (Either String CSV)
|
||||
parseCsv separator filePath csvdata =
|
||||
case filePath of
|
||||
"-" -> liftM (parseCassava separator "(stdin)") T.getContents
|
||||
_ -> return $ parseCassava separator filePath csvdata
|
||||
|
||||
parseCassava :: Char -> FilePath -> Text -> Either String CSV
|
||||
parseCassava separator path content =
|
||||
either (Left . errorBundlePretty) (Right . parseResultToCsv) <$>
|
||||
CassavaMP.decodeWith (decodeOptions separator) Cassava.NoHeader path $
|
||||
BL.fromStrict $ T.encodeUtf8 content
|
||||
|
||||
decodeOptions :: Char -> Cassava.DecodeOptions
|
||||
decodeOptions separator = Cassava.defaultDecodeOptions {
|
||||
Cassava.decDelimiter = fromIntegral (ord separator)
|
||||
}
|
||||
|
||||
parseResultToCsv :: (Foldable t, Functor t) => t (t B.ByteString) -> CSV
|
||||
parseResultToCsv = toListList . unpackFields
|
||||
where
|
||||
toListList = toList . fmap toList
|
||||
unpackFields = (fmap . fmap) (T.unpack . T.decodeUtf8)
|
||||
|
||||
printCSV :: CSV -> String
|
||||
printCSV records = unlined (printRecord `map` records)
|
||||
where printRecord = concat . intersperse "," . map printField
|
||||
printField f = "\"" ++ concatMap escape f ++ "\""
|
||||
escape '"' = "\"\""
|
||||
escape x = [x]
|
||||
unlined = concat . intersperse "\n"
|
||||
|
||||
-- | Return the cleaned up and validated CSV data (can be empty), or an error.
|
||||
validateCsv :: CsvRules -> Int -> Either String CSV -> Either String [CsvRecord]
|
||||
validateCsv _ _ (Left err) = Left err
|
||||
validateCsv rules numhdrlines (Right rs) = validate $ applyConditionalSkips $ drop numhdrlines $ filternulls rs
|
||||
where
|
||||
filternulls = filter (/=[""])
|
||||
skipCount r =
|
||||
case (getEffectiveAssignment rules r "end", getEffectiveAssignment rules r "skip") of
|
||||
(Nothing, Nothing) -> Nothing
|
||||
(Just _, _) -> Just maxBound
|
||||
(Nothing, Just "") -> Just 1
|
||||
(Nothing, Just x) -> Just (read x)
|
||||
applyConditionalSkips [] = []
|
||||
applyConditionalSkips (r:rest) =
|
||||
case skipCount r of
|
||||
Nothing -> r:(applyConditionalSkips rest)
|
||||
Just cnt -> applyConditionalSkips (drop (cnt-1) rest)
|
||||
validate [] = Right []
|
||||
validate rs@(_first:_)
|
||||
| isJust lessthan2 = let r = fromJust lessthan2 in
|
||||
Left $ printf "CSV record %s has less than two fields" (show r)
|
||||
| otherwise = Right rs
|
||||
where
|
||||
lessthan2 = headMay $ filter ((<2).length) rs
|
||||
|
||||
-- -- | The highest (0-based) field index referenced in the field
|
||||
-- -- definitions, or -1 if no fields are defined.
|
||||
-- maxFieldIndex :: CsvRules -> Int
|
||||
-- maxFieldIndex r = maximumDef (-1) $ catMaybes [
|
||||
-- dateField r
|
||||
-- ,statusField r
|
||||
-- ,codeField r
|
||||
-- ,amountField r
|
||||
-- ,amountInField r
|
||||
-- ,amountOutField r
|
||||
-- ,currencyField r
|
||||
-- ,accountField r
|
||||
-- ,account2Field r
|
||||
-- ,date2Field r
|
||||
-- ]
|
||||
|
||||
-- rulesFileFor :: CliOpts -> FilePath -> FilePath
|
||||
-- rulesFileFor CliOpts{rules_file_=Just f} _ = f
|
||||
-- rulesFileFor CliOpts{rules_file_=Nothing} csvfile = replaceExtension csvfile ".rules"
|
||||
-- | Given a CSV file path, what would normally be the corresponding rules file ?
|
||||
rulesFileFor :: FilePath -> FilePath
|
||||
rulesFileFor = (++ ".rules")
|
||||
|
||||
-- | Given a CSV rules file path, what would normally be the corresponding CSV file ?
|
||||
csvFileFor :: FilePath -> FilePath
|
||||
csvFileFor = reverse . drop 6 . reverse
|
||||
|
||||
@ -317,8 +164,143 @@ defaultRulesText csvfile = T.pack $ unlines
|
||||
," account2 assets:bank:savings\n"
|
||||
]
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Conversion rules parsing
|
||||
addDirective :: (DirectiveName, String) -> CsvRules -> CsvRules
|
||||
addDirective d r = r{rdirectives=d:rdirectives r}
|
||||
|
||||
addAssignment :: (HledgerFieldName, FieldTemplate) -> CsvRules -> CsvRules
|
||||
addAssignment a r = r{rassignments=a:rassignments r}
|
||||
|
||||
setIndexesAndAssignmentsFromList :: [CsvFieldName] -> CsvRules -> CsvRules
|
||||
setIndexesAndAssignmentsFromList fs r = addAssignmentsFromList fs . setCsvFieldIndexesFromList fs $ r
|
||||
|
||||
setCsvFieldIndexesFromList :: [CsvFieldName] -> CsvRules -> CsvRules
|
||||
setCsvFieldIndexesFromList fs r = r{rcsvfieldindexes=zip fs [1..]}
|
||||
|
||||
addAssignmentsFromList :: [CsvFieldName] -> CsvRules -> CsvRules
|
||||
addAssignmentsFromList fs r = foldl' maybeAddAssignment r journalfieldnames
|
||||
where
|
||||
maybeAddAssignment rules f = (maybe id addAssignmentFromIndex $ elemIndex f fs) rules
|
||||
where
|
||||
addAssignmentFromIndex i = addAssignment (f, "%"++show (i+1))
|
||||
|
||||
addConditionalBlock :: ConditionalBlock -> CsvRules -> CsvRules
|
||||
addConditionalBlock b r = r{rconditionalblocks=b:rconditionalblocks r}
|
||||
|
||||
getDirective :: DirectiveName -> CsvRules -> Maybe FieldTemplate
|
||||
getDirective directivename = lookup directivename . rdirectives
|
||||
|
||||
instance ShowErrorComponent String where
|
||||
showErrorComponent = id
|
||||
|
||||
-- | Inline all files referenced by include directives in this hledger CSV rules text, recursively.
|
||||
-- Included file paths may be relative to the directory of the provided file path.
|
||||
-- This is done as a pre-parse step to simplify the CSV rules parser.
|
||||
expandIncludes :: FilePath -> Text -> IO Text
|
||||
expandIncludes dir content = mapM (expandLine dir) (T.lines content) >>= return . T.unlines
|
||||
where
|
||||
expandLine dir line =
|
||||
case line of
|
||||
(T.stripPrefix "include " -> Just f) -> expandIncludes dir' =<< T.readFile f'
|
||||
where
|
||||
f' = dir </> dropWhile isSpace (T.unpack f)
|
||||
dir' = takeDirectory f'
|
||||
_ -> return line
|
||||
|
||||
-- | An error-throwing IO action that parses this text as CSV conversion rules
|
||||
-- and runs some extra validation checks. The file path is used in error messages.
|
||||
parseAndValidateCsvRules :: FilePath -> T.Text -> Either String CsvRules
|
||||
parseAndValidateCsvRules rulesfile s =
|
||||
case parseCsvRules rulesfile s of
|
||||
Left err -> Left $ customErrorBundlePretty err
|
||||
Right rules -> first makeFancyParseError $ validateRules rules
|
||||
where
|
||||
makeFancyParseError :: String -> String
|
||||
makeFancyParseError errorString =
|
||||
parseErrorPretty (FancyError 0 (S.singleton $ ErrorFail errorString) :: ParseError Text String)
|
||||
|
||||
-- | Parse this text as CSV conversion rules. The file path is for error messages.
|
||||
parseCsvRules :: FilePath -> T.Text -> Either (ParseErrorBundle T.Text CustomErr) CsvRules
|
||||
-- parseCsvRules rulesfile s = runParser csvrulesfile nullrules{baseAccount=takeBaseName rulesfile} rulesfile s
|
||||
parseCsvRules rulesfile s =
|
||||
runParser (evalStateT rulesp defrules) rulesfile s
|
||||
|
||||
-- | Return the validated rules, or an error.
|
||||
validateRules :: CsvRules -> Either String CsvRules
|
||||
validateRules rules = do
|
||||
unless (isAssigned "date") $ Left "Please specify (at top level) the date field. Eg: date %1\n"
|
||||
Right rules
|
||||
where
|
||||
isAssigned f = isJust $ getEffectiveAssignment rules [] f
|
||||
|
||||
-- *** rules types
|
||||
|
||||
-- | A set of data definitions and account-matching patterns sufficient to
|
||||
-- convert a particular CSV data file into meaningful journal transactions.
|
||||
data CsvRules = CsvRules {
|
||||
rdirectives :: [(DirectiveName,String)],
|
||||
-- ^ top-level rules, as (keyword, value) pairs
|
||||
rcsvfieldindexes :: [(CsvFieldName, CsvFieldIndex)],
|
||||
-- ^ csv field names and their column number, if declared by a fields list
|
||||
rassignments :: [(HledgerFieldName, FieldTemplate)],
|
||||
-- ^ top-level assignments to hledger fields, as (field name, value template) pairs
|
||||
rconditionalblocks :: [ConditionalBlock]
|
||||
-- ^ conditional blocks, which containing additional assignments/rules to apply to matched csv records
|
||||
} deriving (Show, Eq)
|
||||
|
||||
type CsvRulesParser a = StateT CsvRules SimpleTextParser a
|
||||
|
||||
-- | The keyword of a CSV rule - "fields", "skip", "if", etc.
|
||||
type DirectiveName = String
|
||||
|
||||
-- | CSV field name.
|
||||
type CsvFieldName = String
|
||||
|
||||
-- | 1-based CSV column number.
|
||||
type CsvFieldIndex = Int
|
||||
|
||||
-- | Percent symbol followed by a CSV field name or column number. Eg: %date, %1.
|
||||
type CsvFieldReference = String
|
||||
|
||||
-- | One of the standard hledger fields or pseudo-fields that can be assigned to.
|
||||
-- Eg date, account1, amount, amount1-in, date-format.
|
||||
type HledgerFieldName = String
|
||||
|
||||
-- | A text value to be assigned to a hledger field, possibly
|
||||
-- containing csv field references to be interpolated.
|
||||
type FieldTemplate = String
|
||||
|
||||
-- | A strptime date parsing pattern, as supported by Data.Time.Format.
|
||||
type DateFormat = String
|
||||
|
||||
-- | A regular expression.
|
||||
type RegexpPattern = String
|
||||
|
||||
-- | A single test for matching a CSV record, in one way or another.
|
||||
data Matcher =
|
||||
RecordMatcher RegexpPattern -- ^ match if this regexp matches the overall CSV record
|
||||
| FieldMatcher CsvFieldReference RegexpPattern -- ^ match if this regexp matches the referenced CSV field's value
|
||||
deriving (Show, Eq)
|
||||
|
||||
-- | A conditional block: a set of CSV record matchers, and a sequence
|
||||
-- of rules which will be enabled only if one or more of the matchers
|
||||
-- succeeds.
|
||||
--
|
||||
-- Three types of rule are allowed inside conditional blocks: field
|
||||
-- assignments, skip, end. (A skip or end rule is stored as if it was
|
||||
-- a field assignment, and executed in validateCsv. XXX)
|
||||
data ConditionalBlock = CB {
|
||||
cbMatchers :: [Matcher]
|
||||
,cbAssignments :: [(HledgerFieldName, FieldTemplate)]
|
||||
} deriving (Show, Eq)
|
||||
|
||||
defrules = CsvRules {
|
||||
rdirectives=[],
|
||||
rcsvfieldindexes=[],
|
||||
rassignments=[],
|
||||
rconditionalblocks=[]
|
||||
}
|
||||
|
||||
-- *** rules parsers
|
||||
|
||||
{-
|
||||
Grammar for the CSV conversion rules, more or less:
|
||||
@ -383,146 +365,6 @@ DIGIT: 0-9
|
||||
|
||||
-}
|
||||
|
||||
{- |
|
||||
A set of data definitions and account-matching patterns sufficient to
|
||||
convert a particular CSV data file into meaningful journal transactions.
|
||||
-}
|
||||
data CsvRules = CsvRules {
|
||||
rdirectives :: [(DirectiveName,String)],
|
||||
-- ^ top-level rules, as (keyword, value) pairs
|
||||
rcsvfieldindexes :: [(CsvFieldName, CsvFieldIndex)],
|
||||
-- ^ csv field names and their column number, if declared by a fields list
|
||||
rassignments :: [(HledgerFieldName, FieldTemplate)],
|
||||
-- ^ top-level assignments to hledger fields, as (field name, value template) pairs
|
||||
rconditionalblocks :: [ConditionalBlock]
|
||||
-- ^ conditional blocks, which containing additional assignments/rules to apply to matched csv records
|
||||
} deriving (Show, Eq)
|
||||
|
||||
type CsvRulesParser a = StateT CsvRules SimpleTextParser a
|
||||
|
||||
-- | The keyword of a CSV rule - "fields", "skip", "if", etc.
|
||||
type DirectiveName = String
|
||||
-- | CSV field name.
|
||||
type CsvFieldName = String
|
||||
-- | 1-based CSV column number.
|
||||
type CsvFieldIndex = Int
|
||||
-- | Percent symbol followed by a CSV field name or column number. Eg: %date, %1.
|
||||
type CsvFieldReference = String
|
||||
-- | One of the standard hledger fields or pseudo-fields that can be assigned to.
|
||||
-- Eg date, account1, amount, amount1-in, date-format.
|
||||
type HledgerFieldName = String
|
||||
-- | A text value to be assigned to a hledger field, possibly
|
||||
-- containing csv field references to be interpolated.
|
||||
type FieldTemplate = String
|
||||
-- | A strptime date parsing pattern, as supported by Data.Time.Format.
|
||||
type DateFormat = String
|
||||
-- | A regular expression.
|
||||
type RegexpPattern = String
|
||||
|
||||
-- | A single test for matching a CSV record, in one way or another.
|
||||
data Matcher =
|
||||
RecordMatcher RegexpPattern -- ^ match if this regexp matches the overall CSV record
|
||||
| FieldMatcher CsvFieldReference RegexpPattern -- ^ match if this regexp matches the referenced CSV field's value
|
||||
deriving (Show, Eq)
|
||||
|
||||
-- | A conditional block: a set of CSV record matchers, and a sequence
|
||||
-- of rules which will be enabled only if one or more of the matchers
|
||||
-- succeeds.
|
||||
--
|
||||
-- Three types of rule are allowed inside conditional blocks: field
|
||||
-- assignments, skip, end. (A skip or end rule is stored as if it was
|
||||
-- a field assignment, and executed in validateCsv. XXX)
|
||||
data ConditionalBlock = CB {
|
||||
cbMatchers :: [Matcher]
|
||||
,cbAssignments :: [(HledgerFieldName, FieldTemplate)]
|
||||
} deriving (Show, Eq)
|
||||
|
||||
defrules = CsvRules {
|
||||
rdirectives=[],
|
||||
rcsvfieldindexes=[],
|
||||
rassignments=[],
|
||||
rconditionalblocks=[]
|
||||
}
|
||||
|
||||
addDirective :: (DirectiveName, String) -> CsvRules -> CsvRules
|
||||
addDirective d r = r{rdirectives=d:rdirectives r}
|
||||
|
||||
addAssignment :: (HledgerFieldName, FieldTemplate) -> CsvRules -> CsvRules
|
||||
addAssignment a r = r{rassignments=a:rassignments r}
|
||||
|
||||
setIndexesAndAssignmentsFromList :: [CsvFieldName] -> CsvRules -> CsvRules
|
||||
setIndexesAndAssignmentsFromList fs r = addAssignmentsFromList fs . setCsvFieldIndexesFromList fs $ r
|
||||
|
||||
setCsvFieldIndexesFromList :: [CsvFieldName] -> CsvRules -> CsvRules
|
||||
setCsvFieldIndexesFromList fs r = r{rcsvfieldindexes=zip fs [1..]}
|
||||
|
||||
addAssignmentsFromList :: [CsvFieldName] -> CsvRules -> CsvRules
|
||||
addAssignmentsFromList fs r = foldl' maybeAddAssignment r journalfieldnames
|
||||
where
|
||||
maybeAddAssignment rules f = (maybe id addAssignmentFromIndex $ elemIndex f fs) rules
|
||||
where
|
||||
addAssignmentFromIndex i = addAssignment (f, "%"++show (i+1))
|
||||
|
||||
addConditionalBlock :: ConditionalBlock -> CsvRules -> CsvRules
|
||||
addConditionalBlock b r = r{rconditionalblocks=b:rconditionalblocks r}
|
||||
|
||||
getDirective :: DirectiveName -> CsvRules -> Maybe FieldTemplate
|
||||
getDirective directivename = lookup directivename . rdirectives
|
||||
|
||||
instance ShowErrorComponent String where
|
||||
showErrorComponent = id
|
||||
|
||||
-- Not used by hledger; just for lib users,
|
||||
-- | An pure-exception-throwing IO action that parses this file's content
|
||||
-- as CSV conversion rules, interpolating any included files first,
|
||||
-- and runs some extra validation checks.
|
||||
parseRulesFile :: FilePath -> ExceptT String IO CsvRules
|
||||
parseRulesFile f =
|
||||
liftIO (readFilePortably f >>= expandIncludes (takeDirectory f))
|
||||
>>= either throwError return . parseAndValidateCsvRules f
|
||||
|
||||
-- | Inline all files referenced by include directives in this hledger CSV rules text, recursively.
|
||||
-- Included file paths may be relative to the directory of the provided file path.
|
||||
-- This is done as a pre-parse step to simplify the CSV rules parser.
|
||||
expandIncludes :: FilePath -> Text -> IO Text
|
||||
expandIncludes dir content = mapM (expandLine dir) (T.lines content) >>= return . T.unlines
|
||||
where
|
||||
expandLine dir line =
|
||||
case line of
|
||||
(T.stripPrefix "include " -> Just f) -> expandIncludes dir' =<< T.readFile f'
|
||||
where
|
||||
f' = dir </> dropWhile isSpace (T.unpack f)
|
||||
dir' = takeDirectory f'
|
||||
_ -> return line
|
||||
|
||||
-- | An error-throwing IO action that parses this text as CSV conversion rules
|
||||
-- and runs some extra validation checks. The file path is used in error messages.
|
||||
parseAndValidateCsvRules :: FilePath -> T.Text -> Either String CsvRules
|
||||
parseAndValidateCsvRules rulesfile s =
|
||||
case parseCsvRules rulesfile s of
|
||||
Left err -> Left $ customErrorBundlePretty err
|
||||
Right rules -> first makeFancyParseError $ validateRules rules
|
||||
where
|
||||
makeFancyParseError :: String -> String
|
||||
makeFancyParseError errorString =
|
||||
parseErrorPretty (FancyError 0 (S.singleton $ ErrorFail errorString) :: ParseError Text String)
|
||||
|
||||
-- | Parse this text as CSV conversion rules. The file path is for error messages.
|
||||
parseCsvRules :: FilePath -> T.Text -> Either (ParseErrorBundle T.Text CustomErr) CsvRules
|
||||
-- parseCsvRules rulesfile s = runParser csvrulesfile nullrules{baseAccount=takeBaseName rulesfile} rulesfile s
|
||||
parseCsvRules rulesfile s =
|
||||
runParser (evalStateT rulesp defrules) rulesfile s
|
||||
|
||||
-- | Return the validated rules, or an error.
|
||||
validateRules :: CsvRules -> Either String CsvRules
|
||||
validateRules rules = do
|
||||
unless (isAssigned "date") $ Left "Please specify (at top level) the date field. Eg: date %1\n"
|
||||
Right rules
|
||||
where
|
||||
isAssigned f = isJust $ getEffectiveAssignment rules [] f
|
||||
|
||||
-- parsers
|
||||
|
||||
rulesp :: CsvRulesParser CsvRules
|
||||
rulesp = do
|
||||
_ <- many $ choiceInState
|
||||
@ -564,13 +406,10 @@ directives :: [String]
|
||||
directives =
|
||||
["date-format"
|
||||
,"separator"
|
||||
-- ,"default-account1"
|
||||
-- ,"default-account"
|
||||
-- ,"default-currency"
|
||||
-- ,"skip-lines" -- old
|
||||
,"skip"
|
||||
,"newest-first"
|
||||
-- ,"base-account"
|
||||
-- ,"base-currency"
|
||||
, "balance-type"
|
||||
]
|
||||
|
||||
@ -734,10 +573,182 @@ regexp = do
|
||||
-- -- ,"!="
|
||||
-- ]
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Converting CSV records to journal transactions
|
||||
-- ** reading csv files
|
||||
|
||||
type CsvRecord = [String]
|
||||
-- | Read a Journal from the given CSV data (and filename, used for error
|
||||
-- messages), or return an error. Proceed as follows:
|
||||
--
|
||||
-- 1. parse CSV conversion rules from the specified rules file, or from
|
||||
-- the default rules file for the specified CSV file, if it exists,
|
||||
-- or throw a parse error; if it doesn't exist, use built-in default rules
|
||||
--
|
||||
-- 2. parse the CSV data, or throw a parse error
|
||||
--
|
||||
-- 3. convert the CSV records to transactions using the rules
|
||||
--
|
||||
-- 4. if the rules file didn't exist, create it with the default rules and filename
|
||||
--
|
||||
-- 5. return the transactions as a Journal
|
||||
--
|
||||
readJournalFromCsv :: Maybe FilePath -> FilePath -> Text -> IO (Either String Journal)
|
||||
readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when reading CSV from stdin"
|
||||
readJournalFromCsv mrulesfile csvfile csvdata =
|
||||
handle (\(e::IOException) -> return $ Left $ show e) $ do
|
||||
|
||||
-- make and throw an IO exception.. which we catch and convert to an Either above ?
|
||||
let throwerr = throw . userError
|
||||
|
||||
-- parse the csv rules
|
||||
let rulesfile = fromMaybe (rulesFileFor csvfile) mrulesfile
|
||||
rulesfileexists <- doesFileExist rulesfile
|
||||
rulestext <-
|
||||
if rulesfileexists
|
||||
then do
|
||||
dbg1IO "using conversion rules file" rulesfile
|
||||
readFilePortably rulesfile >>= expandIncludes (takeDirectory rulesfile)
|
||||
else
|
||||
return $ defaultRulesText rulesfile
|
||||
rules <- either throwerr return $ parseAndValidateCsvRules rulesfile rulestext
|
||||
dbg2IO "rules" rules
|
||||
|
||||
-- parse the skip directive's value, if any
|
||||
let skiplines = case getDirective "skip" rules of
|
||||
Nothing -> 0
|
||||
Just "" -> 1
|
||||
Just s -> readDef (throwerr $ "could not parse skip value: " ++ show s) s
|
||||
|
||||
-- parse csv
|
||||
-- parsec seems to fail if you pass it "-" here TODO: try again with megaparsec
|
||||
let parsecfilename = if csvfile == "-" then "(stdin)" else csvfile
|
||||
let separator = fromMaybe ',' (getDirective "separator" rules >>= parseSeparator)
|
||||
dbg2IO "separator" separator
|
||||
records <- (either throwerr id .
|
||||
dbg2 "validateCsv" . validateCsv rules skiplines .
|
||||
dbg2 "parseCsv")
|
||||
`fmap` parseCsv separator parsecfilename csvdata
|
||||
dbg1IO "first 3 csv records" $ take 3 records
|
||||
|
||||
-- identify header lines
|
||||
-- let (headerlines, datalines) = identifyHeaderLines records
|
||||
-- mfieldnames = lastMay headerlines
|
||||
|
||||
let
|
||||
-- convert CSV records to transactions
|
||||
txns = snd $ mapAccumL
|
||||
(\pos r ->
|
||||
let
|
||||
SourcePos name line col = pos
|
||||
line' = (mkPos . (+1) . unPos) line
|
||||
pos' = SourcePos name line' col
|
||||
in
|
||||
(pos, transactionFromCsvRecord pos' rules r)
|
||||
)
|
||||
(initialPos parsecfilename) records
|
||||
|
||||
-- Ensure transactions are ordered chronologically.
|
||||
-- First, if the CSV records seem to be most-recent-first (because
|
||||
-- there's an explicit "newest-first" directive, or there's more
|
||||
-- than one date and the first date is more recent than the last):
|
||||
-- reverse them to get same-date transactions ordered chronologically.
|
||||
txns' =
|
||||
(if newestfirst || mseemsnewestfirst == Just True then reverse else id) txns
|
||||
where
|
||||
newestfirst = dbg3 "newestfirst" $ isJust $ getDirective "newest-first" rules
|
||||
mseemsnewestfirst = dbg3 "mseemsnewestfirst" $
|
||||
case nub $ map tdate txns of
|
||||
ds | length ds > 1 -> Just $ head ds > last ds
|
||||
_ -> Nothing
|
||||
-- Second, sort by date.
|
||||
txns'' = sortBy (comparing tdate) txns'
|
||||
|
||||
when (not rulesfileexists) $ do
|
||||
dbg1IO "creating conversion rules file" rulesfile
|
||||
writeFile rulesfile $ T.unpack rulestext
|
||||
|
||||
return $ Right nulljournal{jtxns=txns''}
|
||||
|
||||
-- | Parse special separator names TAB and SPACE, or return the first
|
||||
-- character. Return Nothing on empty string
|
||||
parseSeparator :: String -> Maybe Char
|
||||
parseSeparator = specials . map toLower
|
||||
where specials "space" = Just ' '
|
||||
specials "tab" = Just '\t'
|
||||
specials (x:_) = Just x
|
||||
specials [] = Nothing
|
||||
|
||||
parseCsv :: Char -> FilePath -> Text -> IO (Either String CSV)
|
||||
parseCsv separator filePath csvdata =
|
||||
case filePath of
|
||||
"-" -> liftM (parseCassava separator "(stdin)") T.getContents
|
||||
_ -> return $ parseCassava separator filePath csvdata
|
||||
|
||||
parseCassava :: Char -> FilePath -> Text -> Either String CSV
|
||||
parseCassava separator path content =
|
||||
either (Left . errorBundlePretty) (Right . parseResultToCsv) <$>
|
||||
CassavaMP.decodeWith (decodeOptions separator) Cassava.NoHeader path $
|
||||
BL.fromStrict $ T.encodeUtf8 content
|
||||
|
||||
decodeOptions :: Char -> Cassava.DecodeOptions
|
||||
decodeOptions separator = Cassava.defaultDecodeOptions {
|
||||
Cassava.decDelimiter = fromIntegral (ord separator)
|
||||
}
|
||||
|
||||
parseResultToCsv :: (Foldable t, Functor t) => t (t B.ByteString) -> CSV
|
||||
parseResultToCsv = toListList . unpackFields
|
||||
where
|
||||
toListList = toList . fmap toList
|
||||
unpackFields = (fmap . fmap) (T.unpack . T.decodeUtf8)
|
||||
|
||||
printCSV :: CSV -> String
|
||||
printCSV records = unlined (printRecord `map` records)
|
||||
where printRecord = concat . intersperse "," . map printField
|
||||
printField f = "\"" ++ concatMap escape f ++ "\""
|
||||
escape '"' = "\"\""
|
||||
escape x = [x]
|
||||
unlined = concat . intersperse "\n"
|
||||
|
||||
-- | Return the cleaned up and validated CSV data (can be empty), or an error.
|
||||
validateCsv :: CsvRules -> Int -> Either String CSV -> Either String [CsvRecord]
|
||||
validateCsv _ _ (Left err) = Left err
|
||||
validateCsv rules numhdrlines (Right rs) = validate $ applyConditionalSkips $ drop numhdrlines $ filternulls rs
|
||||
where
|
||||
filternulls = filter (/=[""])
|
||||
skipCount r =
|
||||
case (getEffectiveAssignment rules r "end", getEffectiveAssignment rules r "skip") of
|
||||
(Nothing, Nothing) -> Nothing
|
||||
(Just _, _) -> Just maxBound
|
||||
(Nothing, Just "") -> Just 1
|
||||
(Nothing, Just x) -> Just (read x)
|
||||
applyConditionalSkips [] = []
|
||||
applyConditionalSkips (r:rest) =
|
||||
case skipCount r of
|
||||
Nothing -> r:(applyConditionalSkips rest)
|
||||
Just cnt -> applyConditionalSkips (drop (cnt-1) rest)
|
||||
validate [] = Right []
|
||||
validate rs@(_first:_)
|
||||
| isJust lessthan2 = let r = fromJust lessthan2 in
|
||||
Left $ printf "CSV record %s has less than two fields" (show r)
|
||||
| otherwise = Right rs
|
||||
where
|
||||
lessthan2 = headMay $ filter ((<2).length) rs
|
||||
|
||||
-- -- | The highest (0-based) field index referenced in the field
|
||||
-- -- definitions, or -1 if no fields are defined.
|
||||
-- maxFieldIndex :: CsvRules -> Int
|
||||
-- maxFieldIndex r = maximumDef (-1) $ catMaybes [
|
||||
-- dateField r
|
||||
-- ,statusField r
|
||||
-- ,codeField r
|
||||
-- ,amountField r
|
||||
-- ,amountInField r
|
||||
-- ,amountOutField r
|
||||
-- ,currencyField r
|
||||
-- ,accountField r
|
||||
-- ,account2Field r
|
||||
-- ,date2Field r
|
||||
-- ]
|
||||
|
||||
-- ** converting csv records to transactions
|
||||
|
||||
showRules rules record =
|
||||
unlines $ catMaybes [ (("the "++fld++" rule is: ")++) <$> getEffectiveAssignment rules record fld | fld <- journalfieldnames]
|
||||
@ -940,6 +951,7 @@ mkPosting rules record number accountFld amountFld amountInFld amountOutFld bala
|
||||
-- XXX what's this needed for ? Test & document, or drop.
|
||||
-- Also, this the only place we interpolate in a keyword rule, I think.
|
||||
`withDefault` ruleval ("default-account" ++ number))
|
||||
-- XXX what's this needed for ? Test & document, or drop.
|
||||
mdefaultcurrency = rule "default-currency"
|
||||
currency = fromMaybe (fromMaybe "" mdefaultcurrency) $
|
||||
fieldval ("currency"++number) `withDefault` fieldval "currency"
|
||||
@ -985,7 +997,7 @@ improveUnknownAccountName p@Posting{..}
|
||||
-- possibly set by a balance-type rule.
|
||||
-- The CSV rules and current record are also provided, to be shown in case
|
||||
-- balance-type's argument is bad (XXX refactor).
|
||||
mkBalanceAssertion :: CsvRules -> Record -> (Amount, GenericSourcePos) -> BalanceAssertion
|
||||
mkBalanceAssertion :: CsvRules -> CsvRecord -> (Amount, GenericSourcePos) -> BalanceAssertion
|
||||
mkBalanceAssertion rules record (amt, pos) = assrt{baamount=amt, baposition=pos}
|
||||
where
|
||||
assrt =
|
||||
@ -1162,8 +1174,7 @@ parseDateWithCustomOrDefaultFormats mformat s = firstJust $ map parsewith format
|
||||
(:[])
|
||||
mformat
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- tests
|
||||
-- ** tests
|
||||
|
||||
tests_CsvReader = tests "CsvReader" [
|
||||
tests "parseCsvRules" [
|
||||
|
||||
@ -24,7 +24,7 @@ import Data.Maybe
|
||||
-- import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import System.Console.CmdArgs.Explicit
|
||||
import Hledger.Read.CsvReader (CSV, Record, printCSV)
|
||||
import Hledger.Read.CsvReader (CSV, CsvRecord, printCSV)
|
||||
|
||||
import Hledger
|
||||
import Hledger.Cli.CliOptions
|
||||
@ -71,7 +71,7 @@ postingsReportAsCsv (_,is) =
|
||||
:
|
||||
map postingsReportItemAsCsvRecord is
|
||||
|
||||
postingsReportItemAsCsvRecord :: PostingsReportItem -> Record
|
||||
postingsReportItemAsCsvRecord :: PostingsReportItem -> CsvRecord
|
||||
postingsReportItemAsCsvRecord (_, _, _, p, b) = [idx,date,code,desc,acct,amt,bal]
|
||||
where
|
||||
idx = show $ maybe 0 tindex $ ptransaction p
|
||||
|
||||
Loading…
Reference in New Issue
Block a user