imp: Support tsv and ssv prefixes (#2164)
This commit is contained in:
parent
a7c5225498
commit
d4ecdb3fea
@ -622,9 +622,38 @@ data Journal = Journal {
|
||||
-- The data is partial, and list fields are in reverse order.
|
||||
type ParsedJournal = Journal
|
||||
|
||||
-- | One of the standard *-separated value file types known by hledger,
|
||||
data SepFormat
|
||||
= Csv -- comma-separated
|
||||
| Tsv -- tab-separated
|
||||
| Ssv -- semicolon-separated
|
||||
deriving Eq
|
||||
|
||||
-- | The id of a data format understood by hledger, eg @journal@ or @csv@.
|
||||
-- The --output-format option selects one of these for output.
|
||||
type StorageFormat = String
|
||||
data StorageFormat
|
||||
= Rules
|
||||
| Journal'
|
||||
| Ledger'
|
||||
| Timeclock
|
||||
| Timedot
|
||||
| Sep SepFormat
|
||||
deriving Eq
|
||||
|
||||
instance Show SepFormat where
|
||||
show Csv = "csv"
|
||||
show Ssv = "ssv"
|
||||
show Tsv = "tsv"
|
||||
|
||||
instance Show StorageFormat where
|
||||
show Rules = "rules"
|
||||
show Journal' = "journal"
|
||||
show Ledger' = "ledger"
|
||||
show Timeclock = "timeclock"
|
||||
show Timedot = "timedot"
|
||||
show (Sep Csv) = "csv"
|
||||
show (Sep Ssv) = "ssv"
|
||||
show (Sep Tsv) = "tsv"
|
||||
|
||||
-- | Extra information found in a payee directive.
|
||||
data PayeeDeclarationInfo = PayeeDeclarationInfo {
|
||||
|
||||
@ -190,7 +190,7 @@ data Reader m = Reader {
|
||||
,rParser :: MonadIO m => ErroringJournalParser m ParsedJournal
|
||||
}
|
||||
|
||||
instance Show (Reader m) where show r = rFormat r ++ " reader"
|
||||
instance Show (Reader m) where show r = show (rFormat r) ++ " reader"
|
||||
|
||||
-- | Parse an InputOpts from a RawOpts and a provided date.
|
||||
-- This will fail with a usage error if the forecast period expression cannot be parsed.
|
||||
|
||||
@ -41,11 +41,11 @@ import Hledger.Read.RulesReader (readJournalFromCsv)
|
||||
|
||||
--- ** reader
|
||||
|
||||
reader :: MonadIO m => Reader m
|
||||
reader = Reader
|
||||
{rFormat = "csv"
|
||||
,rExtensions = ["csv","tsv","ssv"]
|
||||
,rReadFn = parse
|
||||
reader :: MonadIO m => SepFormat -> Reader m
|
||||
reader sep = Reader
|
||||
{rFormat = Sep sep
|
||||
,rExtensions = [show sep]
|
||||
,rReadFn = parse sep
|
||||
,rParser = error' "sorry, CSV files can't be included yet" -- PARTIAL:
|
||||
}
|
||||
|
||||
@ -54,10 +54,10 @@ reader = Reader
|
||||
-- This file path is normally the CSV(/SSV/TSV) data file, and a corresponding rules file is inferred.
|
||||
-- But it can also be the rules file, in which case the corresponding data file is inferred.
|
||||
-- This does not check balance assertions.
|
||||
parse :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal
|
||||
parse iopts f t = do
|
||||
parse :: SepFormat -> InputOpts -> FilePath -> Text -> ExceptT String IO Journal
|
||||
parse sep iopts f t = do
|
||||
let mrulesfile = mrules_file_ iopts
|
||||
readJournalFromCsv (Right <$> mrulesfile) f t
|
||||
readJournalFromCsv (Right <$> mrulesfile) f t (Just sep)
|
||||
-- apply any command line account aliases. Can fail with a bad replacement pattern.
|
||||
>>= liftEither . journalApplyAliases (aliasesFromOpts iopts)
|
||||
-- journalFinalise assumes the journal's items are
|
||||
|
||||
@ -139,21 +139,25 @@ readers' = [
|
||||
,TimeclockReader.reader
|
||||
,TimedotReader.reader
|
||||
,RulesReader.reader
|
||||
,CsvReader.reader
|
||||
,CsvReader.reader Csv
|
||||
,CsvReader.reader Tsv
|
||||
,CsvReader.reader Ssv
|
||||
-- ,LedgerReader.reader
|
||||
]
|
||||
|
||||
readerNames :: [String]
|
||||
readerNames = map rFormat (readers'::[Reader IO])
|
||||
readerNames = map (show . rFormat) (readers'::[Reader IO])
|
||||
|
||||
-- | @findReader mformat mpath@
|
||||
--
|
||||
-- Find the reader named by @mformat@, if provided.
|
||||
-- ("ssv" and "tsv" are recognised as alternate names for the csv reader,
|
||||
-- which also handles those formats.)
|
||||
-- Or, if a file path is provided, find the first reader that handles
|
||||
-- its file extension, if any.
|
||||
findReader :: MonadIO m => Maybe StorageFormat -> Maybe FilePath -> Maybe (Reader m)
|
||||
findReader Nothing Nothing = Nothing
|
||||
findReader (Just fmt) _ = headMay [r | r <- readers', rFormat r == fmt]
|
||||
findReader (Just fmt) _ = headMay [r | r <- readers', let rname = rFormat r, rname == fmt]
|
||||
findReader Nothing (Just path) =
|
||||
case prefix of
|
||||
Just fmt -> headMay [r | r <- readers', rFormat r == fmt]
|
||||
@ -168,16 +172,27 @@ type PrefixedFilePath = FilePath
|
||||
|
||||
-- | If a filepath is prefixed by one of the reader names and a colon,
|
||||
-- split that off. Eg "csv:-" -> (Just "csv", "-").
|
||||
splitReaderPrefix :: PrefixedFilePath -> (Maybe String, FilePath)
|
||||
-- These reader prefixes can be used to force a specific reader,
|
||||
-- overriding the file extension.
|
||||
splitReaderPrefix :: PrefixedFilePath -> (Maybe StorageFormat, FilePath)
|
||||
splitReaderPrefix f =
|
||||
headDef (Nothing, f) $
|
||||
[(Just r, drop (length r + 1) f) | r <- readerNames, (r++":") `isPrefixOf` f]
|
||||
let
|
||||
candidates = [(Just r, drop (length r + 1) f) | r <- readerNames ++ ["ssv","tsv"], (r++":") `isPrefixOf` f]
|
||||
(strPrefix, newF) = headDef (Nothing, f) candidates
|
||||
in case strPrefix of
|
||||
Just "csv" -> (Just (Sep Csv), newF)
|
||||
Just "tsv" -> (Just (Sep Tsv), newF)
|
||||
Just "ssv" -> (Just (Sep Ssv), newF)
|
||||
Just "journal" -> (Just Journal', newF)
|
||||
Just "timeclock" -> (Just Timeclock, newF)
|
||||
Just "timedot" -> (Just Timedot, newF)
|
||||
_ -> (Nothing, f)
|
||||
|
||||
--- ** reader
|
||||
|
||||
reader :: MonadIO m => Reader m
|
||||
reader = Reader
|
||||
{rFormat = "journal"
|
||||
{rFormat = Journal'
|
||||
,rExtensions = ["journal", "j", "hledger", "ledger"]
|
||||
,rReadFn = parse
|
||||
,rParser = journalp -- no need to add command line aliases like journalp'
|
||||
@ -282,7 +297,7 @@ includedirectivep = do
|
||||
paths <- getFilePaths parentoff parentpos glb
|
||||
let prefixedpaths = case mprefix of
|
||||
Nothing -> paths
|
||||
Just fmt -> map ((fmt++":")++) paths
|
||||
Just fmt -> map ((show fmt++":")++) paths
|
||||
forM_ prefixedpaths $ parseChild parentpos
|
||||
void newline
|
||||
|
||||
|
||||
@ -90,7 +90,7 @@ _READER__________________________________________ = undefined -- VSCode outline
|
||||
|
||||
reader :: MonadIO m => Reader m
|
||||
reader = Reader
|
||||
{rFormat = "rules"
|
||||
{rFormat = Rules
|
||||
,rExtensions = ["rules"]
|
||||
,rReadFn = parse
|
||||
,rParser = error' "sorry, rules files can't be included" -- PARTIAL:
|
||||
@ -135,7 +135,7 @@ parse iopts f _ = do
|
||||
then return nulljournal -- data file inferred from rules file name was not found
|
||||
else do
|
||||
t <- liftIO $ readFileOrStdinPortably dat
|
||||
readJournalFromCsv (Just $ Left rules) dat t
|
||||
readJournalFromCsv (Just $ Left rules) dat t Nothing
|
||||
-- apply any command line account aliases. Can fail with a bad replacement pattern.
|
||||
>>= liftEither . journalApplyAliases (aliasesFromOpts iopts)
|
||||
-- journalFinalise assumes the journal's items are
|
||||
@ -855,9 +855,9 @@ _CSV_READING__________________________________________ = undefined
|
||||
--
|
||||
-- 4. Return the transactions as a Journal.
|
||||
--
|
||||
readJournalFromCsv :: Maybe (Either CsvRules FilePath) -> FilePath -> Text -> ExceptT String IO Journal
|
||||
readJournalFromCsv Nothing "-" _ = throwError "please use --rules-file when reading CSV from stdin"
|
||||
readJournalFromCsv merulesfile csvfile csvtext = do
|
||||
readJournalFromCsv :: Maybe (Either CsvRules FilePath) -> FilePath -> Text -> Maybe SepFormat -> ExceptT String IO Journal
|
||||
readJournalFromCsv Nothing "-" _ _ = throwError "please use --rules-file when reading CSV from stdin"
|
||||
readJournalFromCsv merulesfile csvfile csvtext sep = do
|
||||
-- for now, correctness is the priority here, efficiency not so much
|
||||
|
||||
rules <- case merulesfile of
|
||||
@ -879,14 +879,19 @@ readJournalFromCsv merulesfile csvfile csvtext = do
|
||||
-- convert back to text and parse as csv records
|
||||
let
|
||||
csvtext1 = T.unlines csvlines2
|
||||
separator =
|
||||
case getDirective "separator" rules >>= parseSeparator of
|
||||
Just c -> c
|
||||
_ | ext == "ssv" -> ';'
|
||||
_ | ext == "tsv" -> '\t'
|
||||
_ -> ','
|
||||
where
|
||||
ext = map toLower $ drop 1 $ takeExtension csvfile
|
||||
-- The separator in the rules file takes precedence over the extension or prefix
|
||||
separator = case getDirective "separator" rules >>= parseSeparator of
|
||||
Just c -> c
|
||||
_ | ext == "ssv" -> ';'
|
||||
_ | ext == "tsv" -> '\t'
|
||||
_ ->
|
||||
case sep of
|
||||
Just Csv -> ','
|
||||
Just Ssv -> ';'
|
||||
Just Tsv -> '\t'
|
||||
Nothing -> ','
|
||||
where
|
||||
ext = map toLower $ drop 1 $ takeExtension csvfile
|
||||
-- parsec seemed to fail if you pass it "-" here -- TODO: try again with megaparsec
|
||||
parsecfilename = if csvfile == "-" then "(stdin)" else csvfile
|
||||
dbg6IO "using separator" separator
|
||||
|
||||
@ -77,7 +77,7 @@ import Data.Text as T (strip)
|
||||
|
||||
reader :: MonadIO m => Reader m
|
||||
reader = Reader
|
||||
{rFormat = "timeclock"
|
||||
{rFormat = Timeclock
|
||||
,rExtensions = ["timeclock"]
|
||||
,rReadFn = parse
|
||||
,rParser = timeclockfilep
|
||||
|
||||
@ -66,7 +66,7 @@ import Data.List (group)
|
||||
|
||||
reader :: MonadIO m => Reader m
|
||||
reader = Reader
|
||||
{rFormat = "timedot"
|
||||
{rFormat = Timedot
|
||||
,rExtensions = ["timedot"]
|
||||
,rReadFn = parse
|
||||
,rParser = timedotp
|
||||
|
||||
@ -625,7 +625,7 @@ expandPathPreservingPrefix d prefixedf = do
|
||||
let (p,f) = splitReaderPrefix prefixedf
|
||||
f' <- expandPath d f
|
||||
return $ case p of
|
||||
Just p' -> p' ++ ":" ++ f'
|
||||
Just p' -> (show p') ++ ":" ++ f'
|
||||
Nothing -> f'
|
||||
|
||||
-- | Get the expanded, absolute output file path specified by an
|
||||
|
||||
@ -1131,6 +1131,19 @@ $ ./csvtest.sh
|
||||
|
||||
>=
|
||||
|
||||
# ** 59. specify ssv prefix and no extension
|
||||
<
|
||||
12/11/2019;Foo;123;10.23
|
||||
RULES
|
||||
fields date, description, , amount
|
||||
date-format %d/%m/%Y
|
||||
$ ./ssvtest.sh
|
||||
2019-11-12 Foo
|
||||
expenses:unknown 10.23
|
||||
income:unknown -10.23
|
||||
|
||||
>=
|
||||
|
||||
# ** .
|
||||
#<
|
||||
#$ ./csvtest.sh
|
||||
|
||||
22
hledger/test/ssvtest.sh
Executable file
22
hledger/test/ssvtest.sh
Executable file
@ -0,0 +1,22 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# sh version, ported from bash so freebsd users can run these tests.
|
||||
# This scripts expects stdin formatted like this:
|
||||
# <multi-line ssv file (at least one line required, even if blank)>
|
||||
# RULES
|
||||
# <multi-line rules>
|
||||
#
|
||||
# Here, unlike in csvtest.sh, the ssv extension is intentionally NOT set
|
||||
# This allows us to verify that the prefix detection is working
|
||||
|
||||
cat > t.$$.input
|
||||
sed '1,/^RULES/d' t.$$.input > t.$$.rules
|
||||
sed '/^RULES/,$d' t.$$.input > t.$$
|
||||
|
||||
trap 'rm -f t.$$.input t.$$ t.$$.rules t.$$.stderr' EXIT
|
||||
|
||||
# Remove variable file name from error messages
|
||||
mkfifo t.$$.stderr
|
||||
sed -Ee "s/t\.$$/input/" t.$$.stderr >&2 &
|
||||
|
||||
hledger -f ssv:t.$$ print "$@" 2> t.$$.stderr
|
||||
Loading…
Reference in New Issue
Block a user