imp: Support tsv and ssv prefixes (#2164)
This commit is contained in:
parent
a7c5225498
commit
d4ecdb3fea
@ -622,9 +622,38 @@ data Journal = Journal {
|
|||||||
-- The data is partial, and list fields are in reverse order.
|
-- The data is partial, and list fields are in reverse order.
|
||||||
type ParsedJournal = Journal
|
type ParsedJournal = Journal
|
||||||
|
|
||||||
|
-- | One of the standard *-separated value file types known by hledger,
|
||||||
|
data SepFormat
|
||||||
|
= Csv -- comma-separated
|
||||||
|
| Tsv -- tab-separated
|
||||||
|
| Ssv -- semicolon-separated
|
||||||
|
deriving Eq
|
||||||
|
|
||||||
-- | The id of a data format understood by hledger, eg @journal@ or @csv@.
|
-- | The id of a data format understood by hledger, eg @journal@ or @csv@.
|
||||||
-- The --output-format option selects one of these for output.
|
-- The --output-format option selects one of these for output.
|
||||||
type StorageFormat = String
|
data StorageFormat
|
||||||
|
= Rules
|
||||||
|
| Journal'
|
||||||
|
| Ledger'
|
||||||
|
| Timeclock
|
||||||
|
| Timedot
|
||||||
|
| Sep SepFormat
|
||||||
|
deriving Eq
|
||||||
|
|
||||||
|
instance Show SepFormat where
|
||||||
|
show Csv = "csv"
|
||||||
|
show Ssv = "ssv"
|
||||||
|
show Tsv = "tsv"
|
||||||
|
|
||||||
|
instance Show StorageFormat where
|
||||||
|
show Rules = "rules"
|
||||||
|
show Journal' = "journal"
|
||||||
|
show Ledger' = "ledger"
|
||||||
|
show Timeclock = "timeclock"
|
||||||
|
show Timedot = "timedot"
|
||||||
|
show (Sep Csv) = "csv"
|
||||||
|
show (Sep Ssv) = "ssv"
|
||||||
|
show (Sep Tsv) = "tsv"
|
||||||
|
|
||||||
-- | Extra information found in a payee directive.
|
-- | Extra information found in a payee directive.
|
||||||
data PayeeDeclarationInfo = PayeeDeclarationInfo {
|
data PayeeDeclarationInfo = PayeeDeclarationInfo {
|
||||||
|
|||||||
@ -190,7 +190,7 @@ data Reader m = Reader {
|
|||||||
,rParser :: MonadIO m => ErroringJournalParser m ParsedJournal
|
,rParser :: MonadIO m => ErroringJournalParser m ParsedJournal
|
||||||
}
|
}
|
||||||
|
|
||||||
instance Show (Reader m) where show r = rFormat r ++ " reader"
|
instance Show (Reader m) where show r = show (rFormat r) ++ " reader"
|
||||||
|
|
||||||
-- | Parse an InputOpts from a RawOpts and a provided date.
|
-- | Parse an InputOpts from a RawOpts and a provided date.
|
||||||
-- This will fail with a usage error if the forecast period expression cannot be parsed.
|
-- This will fail with a usage error if the forecast period expression cannot be parsed.
|
||||||
|
|||||||
@ -41,11 +41,11 @@ import Hledger.Read.RulesReader (readJournalFromCsv)
|
|||||||
|
|
||||||
--- ** reader
|
--- ** reader
|
||||||
|
|
||||||
reader :: MonadIO m => Reader m
|
reader :: MonadIO m => SepFormat -> Reader m
|
||||||
reader = Reader
|
reader sep = Reader
|
||||||
{rFormat = "csv"
|
{rFormat = Sep sep
|
||||||
,rExtensions = ["csv","tsv","ssv"]
|
,rExtensions = [show sep]
|
||||||
,rReadFn = parse
|
,rReadFn = parse sep
|
||||||
,rParser = error' "sorry, CSV files can't be included yet" -- PARTIAL:
|
,rParser = error' "sorry, CSV files can't be included yet" -- PARTIAL:
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -54,10 +54,10 @@ reader = Reader
|
|||||||
-- This file path is normally the CSV(/SSV/TSV) data file, and a corresponding rules file is inferred.
|
-- This file path is normally the CSV(/SSV/TSV) data file, and a corresponding rules file is inferred.
|
||||||
-- But it can also be the rules file, in which case the corresponding data file is inferred.
|
-- But it can also be the rules file, in which case the corresponding data file is inferred.
|
||||||
-- This does not check balance assertions.
|
-- This does not check balance assertions.
|
||||||
parse :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal
|
parse :: SepFormat -> InputOpts -> FilePath -> Text -> ExceptT String IO Journal
|
||||||
parse iopts f t = do
|
parse sep iopts f t = do
|
||||||
let mrulesfile = mrules_file_ iopts
|
let mrulesfile = mrules_file_ iopts
|
||||||
readJournalFromCsv (Right <$> mrulesfile) f t
|
readJournalFromCsv (Right <$> mrulesfile) f t (Just sep)
|
||||||
-- apply any command line account aliases. Can fail with a bad replacement pattern.
|
-- apply any command line account aliases. Can fail with a bad replacement pattern.
|
||||||
>>= liftEither . journalApplyAliases (aliasesFromOpts iopts)
|
>>= liftEither . journalApplyAliases (aliasesFromOpts iopts)
|
||||||
-- journalFinalise assumes the journal's items are
|
-- journalFinalise assumes the journal's items are
|
||||||
|
|||||||
@ -139,21 +139,25 @@ readers' = [
|
|||||||
,TimeclockReader.reader
|
,TimeclockReader.reader
|
||||||
,TimedotReader.reader
|
,TimedotReader.reader
|
||||||
,RulesReader.reader
|
,RulesReader.reader
|
||||||
,CsvReader.reader
|
,CsvReader.reader Csv
|
||||||
|
,CsvReader.reader Tsv
|
||||||
|
,CsvReader.reader Ssv
|
||||||
-- ,LedgerReader.reader
|
-- ,LedgerReader.reader
|
||||||
]
|
]
|
||||||
|
|
||||||
readerNames :: [String]
|
readerNames :: [String]
|
||||||
readerNames = map rFormat (readers'::[Reader IO])
|
readerNames = map (show . rFormat) (readers'::[Reader IO])
|
||||||
|
|
||||||
-- | @findReader mformat mpath@
|
-- | @findReader mformat mpath@
|
||||||
--
|
--
|
||||||
-- Find the reader named by @mformat@, if provided.
|
-- Find the reader named by @mformat@, if provided.
|
||||||
|
-- ("ssv" and "tsv" are recognised as alternate names for the csv reader,
|
||||||
|
-- which also handles those formats.)
|
||||||
-- Or, if a file path is provided, find the first reader that handles
|
-- Or, if a file path is provided, find the first reader that handles
|
||||||
-- its file extension, if any.
|
-- its file extension, if any.
|
||||||
findReader :: MonadIO m => Maybe StorageFormat -> Maybe FilePath -> Maybe (Reader m)
|
findReader :: MonadIO m => Maybe StorageFormat -> Maybe FilePath -> Maybe (Reader m)
|
||||||
findReader Nothing Nothing = Nothing
|
findReader Nothing Nothing = Nothing
|
||||||
findReader (Just fmt) _ = headMay [r | r <- readers', rFormat r == fmt]
|
findReader (Just fmt) _ = headMay [r | r <- readers', let rname = rFormat r, rname == fmt]
|
||||||
findReader Nothing (Just path) =
|
findReader Nothing (Just path) =
|
||||||
case prefix of
|
case prefix of
|
||||||
Just fmt -> headMay [r | r <- readers', rFormat r == fmt]
|
Just fmt -> headMay [r | r <- readers', rFormat r == fmt]
|
||||||
@ -168,16 +172,27 @@ type PrefixedFilePath = FilePath
|
|||||||
|
|
||||||
-- | If a filepath is prefixed by one of the reader names and a colon,
|
-- | If a filepath is prefixed by one of the reader names and a colon,
|
||||||
-- split that off. Eg "csv:-" -> (Just "csv", "-").
|
-- split that off. Eg "csv:-" -> (Just "csv", "-").
|
||||||
splitReaderPrefix :: PrefixedFilePath -> (Maybe String, FilePath)
|
-- These reader prefixes can be used to force a specific reader,
|
||||||
|
-- overriding the file extension.
|
||||||
|
splitReaderPrefix :: PrefixedFilePath -> (Maybe StorageFormat, FilePath)
|
||||||
splitReaderPrefix f =
|
splitReaderPrefix f =
|
||||||
headDef (Nothing, f) $
|
let
|
||||||
[(Just r, drop (length r + 1) f) | r <- readerNames, (r++":") `isPrefixOf` f]
|
candidates = [(Just r, drop (length r + 1) f) | r <- readerNames ++ ["ssv","tsv"], (r++":") `isPrefixOf` f]
|
||||||
|
(strPrefix, newF) = headDef (Nothing, f) candidates
|
||||||
|
in case strPrefix of
|
||||||
|
Just "csv" -> (Just (Sep Csv), newF)
|
||||||
|
Just "tsv" -> (Just (Sep Tsv), newF)
|
||||||
|
Just "ssv" -> (Just (Sep Ssv), newF)
|
||||||
|
Just "journal" -> (Just Journal', newF)
|
||||||
|
Just "timeclock" -> (Just Timeclock, newF)
|
||||||
|
Just "timedot" -> (Just Timedot, newF)
|
||||||
|
_ -> (Nothing, f)
|
||||||
|
|
||||||
--- ** reader
|
--- ** reader
|
||||||
|
|
||||||
reader :: MonadIO m => Reader m
|
reader :: MonadIO m => Reader m
|
||||||
reader = Reader
|
reader = Reader
|
||||||
{rFormat = "journal"
|
{rFormat = Journal'
|
||||||
,rExtensions = ["journal", "j", "hledger", "ledger"]
|
,rExtensions = ["journal", "j", "hledger", "ledger"]
|
||||||
,rReadFn = parse
|
,rReadFn = parse
|
||||||
,rParser = journalp -- no need to add command line aliases like journalp'
|
,rParser = journalp -- no need to add command line aliases like journalp'
|
||||||
@ -282,7 +297,7 @@ includedirectivep = do
|
|||||||
paths <- getFilePaths parentoff parentpos glb
|
paths <- getFilePaths parentoff parentpos glb
|
||||||
let prefixedpaths = case mprefix of
|
let prefixedpaths = case mprefix of
|
||||||
Nothing -> paths
|
Nothing -> paths
|
||||||
Just fmt -> map ((fmt++":")++) paths
|
Just fmt -> map ((show fmt++":")++) paths
|
||||||
forM_ prefixedpaths $ parseChild parentpos
|
forM_ prefixedpaths $ parseChild parentpos
|
||||||
void newline
|
void newline
|
||||||
|
|
||||||
|
|||||||
@ -90,7 +90,7 @@ _READER__________________________________________ = undefined -- VSCode outline
|
|||||||
|
|
||||||
reader :: MonadIO m => Reader m
|
reader :: MonadIO m => Reader m
|
||||||
reader = Reader
|
reader = Reader
|
||||||
{rFormat = "rules"
|
{rFormat = Rules
|
||||||
,rExtensions = ["rules"]
|
,rExtensions = ["rules"]
|
||||||
,rReadFn = parse
|
,rReadFn = parse
|
||||||
,rParser = error' "sorry, rules files can't be included" -- PARTIAL:
|
,rParser = error' "sorry, rules files can't be included" -- PARTIAL:
|
||||||
@ -135,7 +135,7 @@ parse iopts f _ = do
|
|||||||
then return nulljournal -- data file inferred from rules file name was not found
|
then return nulljournal -- data file inferred from rules file name was not found
|
||||||
else do
|
else do
|
||||||
t <- liftIO $ readFileOrStdinPortably dat
|
t <- liftIO $ readFileOrStdinPortably dat
|
||||||
readJournalFromCsv (Just $ Left rules) dat t
|
readJournalFromCsv (Just $ Left rules) dat t Nothing
|
||||||
-- apply any command line account aliases. Can fail with a bad replacement pattern.
|
-- apply any command line account aliases. Can fail with a bad replacement pattern.
|
||||||
>>= liftEither . journalApplyAliases (aliasesFromOpts iopts)
|
>>= liftEither . journalApplyAliases (aliasesFromOpts iopts)
|
||||||
-- journalFinalise assumes the journal's items are
|
-- journalFinalise assumes the journal's items are
|
||||||
@ -855,9 +855,9 @@ _CSV_READING__________________________________________ = undefined
|
|||||||
--
|
--
|
||||||
-- 4. Return the transactions as a Journal.
|
-- 4. Return the transactions as a Journal.
|
||||||
--
|
--
|
||||||
readJournalFromCsv :: Maybe (Either CsvRules FilePath) -> FilePath -> Text -> ExceptT String IO Journal
|
readJournalFromCsv :: Maybe (Either CsvRules FilePath) -> FilePath -> Text -> Maybe SepFormat -> ExceptT String IO Journal
|
||||||
readJournalFromCsv Nothing "-" _ = throwError "please use --rules-file when reading CSV from stdin"
|
readJournalFromCsv Nothing "-" _ _ = throwError "please use --rules-file when reading CSV from stdin"
|
||||||
readJournalFromCsv merulesfile csvfile csvtext = do
|
readJournalFromCsv merulesfile csvfile csvtext sep = do
|
||||||
-- for now, correctness is the priority here, efficiency not so much
|
-- for now, correctness is the priority here, efficiency not so much
|
||||||
|
|
||||||
rules <- case merulesfile of
|
rules <- case merulesfile of
|
||||||
@ -879,14 +879,19 @@ readJournalFromCsv merulesfile csvfile csvtext = do
|
|||||||
-- convert back to text and parse as csv records
|
-- convert back to text and parse as csv records
|
||||||
let
|
let
|
||||||
csvtext1 = T.unlines csvlines2
|
csvtext1 = T.unlines csvlines2
|
||||||
separator =
|
-- The separator in the rules file takes precedence over the extension or prefix
|
||||||
case getDirective "separator" rules >>= parseSeparator of
|
separator = case getDirective "separator" rules >>= parseSeparator of
|
||||||
Just c -> c
|
Just c -> c
|
||||||
_ | ext == "ssv" -> ';'
|
_ | ext == "ssv" -> ';'
|
||||||
_ | ext == "tsv" -> '\t'
|
_ | ext == "tsv" -> '\t'
|
||||||
_ -> ','
|
_ ->
|
||||||
where
|
case sep of
|
||||||
ext = map toLower $ drop 1 $ takeExtension csvfile
|
Just Csv -> ','
|
||||||
|
Just Ssv -> ';'
|
||||||
|
Just Tsv -> '\t'
|
||||||
|
Nothing -> ','
|
||||||
|
where
|
||||||
|
ext = map toLower $ drop 1 $ takeExtension csvfile
|
||||||
-- parsec seemed to fail if you pass it "-" here -- TODO: try again with megaparsec
|
-- parsec seemed to fail if you pass it "-" here -- TODO: try again with megaparsec
|
||||||
parsecfilename = if csvfile == "-" then "(stdin)" else csvfile
|
parsecfilename = if csvfile == "-" then "(stdin)" else csvfile
|
||||||
dbg6IO "using separator" separator
|
dbg6IO "using separator" separator
|
||||||
|
|||||||
@ -77,7 +77,7 @@ import Data.Text as T (strip)
|
|||||||
|
|
||||||
reader :: MonadIO m => Reader m
|
reader :: MonadIO m => Reader m
|
||||||
reader = Reader
|
reader = Reader
|
||||||
{rFormat = "timeclock"
|
{rFormat = Timeclock
|
||||||
,rExtensions = ["timeclock"]
|
,rExtensions = ["timeclock"]
|
||||||
,rReadFn = parse
|
,rReadFn = parse
|
||||||
,rParser = timeclockfilep
|
,rParser = timeclockfilep
|
||||||
|
|||||||
@ -66,7 +66,7 @@ import Data.List (group)
|
|||||||
|
|
||||||
reader :: MonadIO m => Reader m
|
reader :: MonadIO m => Reader m
|
||||||
reader = Reader
|
reader = Reader
|
||||||
{rFormat = "timedot"
|
{rFormat = Timedot
|
||||||
,rExtensions = ["timedot"]
|
,rExtensions = ["timedot"]
|
||||||
,rReadFn = parse
|
,rReadFn = parse
|
||||||
,rParser = timedotp
|
,rParser = timedotp
|
||||||
|
|||||||
@ -625,7 +625,7 @@ expandPathPreservingPrefix d prefixedf = do
|
|||||||
let (p,f) = splitReaderPrefix prefixedf
|
let (p,f) = splitReaderPrefix prefixedf
|
||||||
f' <- expandPath d f
|
f' <- expandPath d f
|
||||||
return $ case p of
|
return $ case p of
|
||||||
Just p' -> p' ++ ":" ++ f'
|
Just p' -> (show p') ++ ":" ++ f'
|
||||||
Nothing -> f'
|
Nothing -> f'
|
||||||
|
|
||||||
-- | Get the expanded, absolute output file path specified by an
|
-- | Get the expanded, absolute output file path specified by an
|
||||||
|
|||||||
@ -1131,6 +1131,19 @@ $ ./csvtest.sh
|
|||||||
|
|
||||||
>=
|
>=
|
||||||
|
|
||||||
|
# ** 59. specify ssv prefix and no extension
|
||||||
|
<
|
||||||
|
12/11/2019;Foo;123;10.23
|
||||||
|
RULES
|
||||||
|
fields date, description, , amount
|
||||||
|
date-format %d/%m/%Y
|
||||||
|
$ ./ssvtest.sh
|
||||||
|
2019-11-12 Foo
|
||||||
|
expenses:unknown 10.23
|
||||||
|
income:unknown -10.23
|
||||||
|
|
||||||
|
>=
|
||||||
|
|
||||||
# ** .
|
# ** .
|
||||||
#<
|
#<
|
||||||
#$ ./csvtest.sh
|
#$ ./csvtest.sh
|
||||||
|
|||||||
22
hledger/test/ssvtest.sh
Executable file
22
hledger/test/ssvtest.sh
Executable file
@ -0,0 +1,22 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# sh version, ported from bash so freebsd users can run these tests.
|
||||||
|
# This scripts expects stdin formatted like this:
|
||||||
|
# <multi-line ssv file (at least one line required, even if blank)>
|
||||||
|
# RULES
|
||||||
|
# <multi-line rules>
|
||||||
|
#
|
||||||
|
# Here, unlike in csvtest.sh, the ssv extension is intentionally NOT set
|
||||||
|
# This allows us to verify that the prefix detection is working
|
||||||
|
|
||||||
|
cat > t.$$.input
|
||||||
|
sed '1,/^RULES/d' t.$$.input > t.$$.rules
|
||||||
|
sed '/^RULES/,$d' t.$$.input > t.$$
|
||||||
|
|
||||||
|
trap 'rm -f t.$$.input t.$$ t.$$.rules t.$$.stderr' EXIT
|
||||||
|
|
||||||
|
# Remove variable file name from error messages
|
||||||
|
mkfifo t.$$.stderr
|
||||||
|
sed -Ee "s/t\.$$/input/" t.$$.stderr >&2 &
|
||||||
|
|
||||||
|
hledger -f ssv:t.$$ print "$@" 2> t.$$.stderr
|
||||||
Loading…
Reference in New Issue
Block a user