From d4ecdb3fea12b8443510c2af5fc9c7d2b4037550 Mon Sep 17 00:00:00 2001 From: Michael Rees Date: Wed, 7 Feb 2024 16:53:20 -0600 Subject: [PATCH] imp: Support tsv and ssv prefixes (#2164) --- hledger-lib/Hledger/Data/Types.hs | 31 ++++++++++++++++++++- hledger-lib/Hledger/Read/Common.hs | 2 +- hledger-lib/Hledger/Read/CsvReader.hs | 16 +++++------ hledger-lib/Hledger/Read/JournalReader.hs | 31 +++++++++++++++------ hledger-lib/Hledger/Read/RulesReader.hs | 31 ++++++++++++--------- hledger-lib/Hledger/Read/TimeclockReader.hs | 2 +- hledger-lib/Hledger/Read/TimedotReader.hs | 2 +- hledger/Hledger/Cli/CliOptions.hs | 2 +- hledger/test/csv.test | 13 +++++++++ hledger/test/ssvtest.sh | 22 +++++++++++++++ 10 files changed, 118 insertions(+), 34 deletions(-) create mode 100755 hledger/test/ssvtest.sh diff --git a/hledger-lib/Hledger/Data/Types.hs b/hledger-lib/Hledger/Data/Types.hs index 80ba9b8c3..40ca26089 100644 --- a/hledger-lib/Hledger/Data/Types.hs +++ b/hledger-lib/Hledger/Data/Types.hs @@ -622,9 +622,38 @@ data Journal = Journal { -- The data is partial, and list fields are in reverse order. type ParsedJournal = Journal +-- | One of the standard *-separated value file types known by hledger, +data SepFormat + = Csv -- comma-separated + | Tsv -- tab-separated + | Ssv -- semicolon-separated + deriving Eq + -- | The id of a data format understood by hledger, eg @journal@ or @csv@. -- The --output-format option selects one of these for output. -type StorageFormat = String +data StorageFormat + = Rules + | Journal' + | Ledger' + | Timeclock + | Timedot + | Sep SepFormat + deriving Eq + +instance Show SepFormat where + show Csv = "csv" + show Ssv = "ssv" + show Tsv = "tsv" + +instance Show StorageFormat where + show Rules = "rules" + show Journal' = "journal" + show Ledger' = "ledger" + show Timeclock = "timeclock" + show Timedot = "timedot" + show (Sep Csv) = "csv" + show (Sep Ssv) = "ssv" + show (Sep Tsv) = "tsv" -- | Extra information found in a payee directive. data PayeeDeclarationInfo = PayeeDeclarationInfo { diff --git a/hledger-lib/Hledger/Read/Common.hs b/hledger-lib/Hledger/Read/Common.hs index 793477655..b23a9c3e5 100644 --- a/hledger-lib/Hledger/Read/Common.hs +++ b/hledger-lib/Hledger/Read/Common.hs @@ -190,7 +190,7 @@ data Reader m = Reader { ,rParser :: MonadIO m => ErroringJournalParser m ParsedJournal } -instance Show (Reader m) where show r = rFormat r ++ " reader" +instance Show (Reader m) where show r = show (rFormat r) ++ " reader" -- | Parse an InputOpts from a RawOpts and a provided date. -- This will fail with a usage error if the forecast period expression cannot be parsed. diff --git a/hledger-lib/Hledger/Read/CsvReader.hs b/hledger-lib/Hledger/Read/CsvReader.hs index 658b4d6a4..da2e3969d 100644 --- a/hledger-lib/Hledger/Read/CsvReader.hs +++ b/hledger-lib/Hledger/Read/CsvReader.hs @@ -41,11 +41,11 @@ import Hledger.Read.RulesReader (readJournalFromCsv) --- ** reader -reader :: MonadIO m => Reader m -reader = Reader - {rFormat = "csv" - ,rExtensions = ["csv","tsv","ssv"] - ,rReadFn = parse +reader :: MonadIO m => SepFormat -> Reader m +reader sep = Reader + {rFormat = Sep sep + ,rExtensions = [show sep] + ,rReadFn = parse sep ,rParser = error' "sorry, CSV files can't be included yet" -- PARTIAL: } @@ -54,10 +54,10 @@ reader = Reader -- This file path is normally the CSV(/SSV/TSV) data file, and a corresponding rules file is inferred. -- But it can also be the rules file, in which case the corresponding data file is inferred. -- This does not check balance assertions. -parse :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal -parse iopts f t = do +parse :: SepFormat -> InputOpts -> FilePath -> Text -> ExceptT String IO Journal +parse sep iopts f t = do let mrulesfile = mrules_file_ iopts - readJournalFromCsv (Right <$> mrulesfile) f t + readJournalFromCsv (Right <$> mrulesfile) f t (Just sep) -- apply any command line account aliases. Can fail with a bad replacement pattern. >>= liftEither . journalApplyAliases (aliasesFromOpts iopts) -- journalFinalise assumes the journal's items are diff --git a/hledger-lib/Hledger/Read/JournalReader.hs b/hledger-lib/Hledger/Read/JournalReader.hs index 369b6457f..e8fa73d2a 100644 --- a/hledger-lib/Hledger/Read/JournalReader.hs +++ b/hledger-lib/Hledger/Read/JournalReader.hs @@ -139,21 +139,25 @@ readers' = [ ,TimeclockReader.reader ,TimedotReader.reader ,RulesReader.reader - ,CsvReader.reader + ,CsvReader.reader Csv + ,CsvReader.reader Tsv + ,CsvReader.reader Ssv -- ,LedgerReader.reader ] readerNames :: [String] -readerNames = map rFormat (readers'::[Reader IO]) +readerNames = map (show . rFormat) (readers'::[Reader IO]) -- | @findReader mformat mpath@ -- -- Find the reader named by @mformat@, if provided. +-- ("ssv" and "tsv" are recognised as alternate names for the csv reader, +-- which also handles those formats.) -- Or, if a file path is provided, find the first reader that handles -- its file extension, if any. findReader :: MonadIO m => Maybe StorageFormat -> Maybe FilePath -> Maybe (Reader m) findReader Nothing Nothing = Nothing -findReader (Just fmt) _ = headMay [r | r <- readers', rFormat r == fmt] +findReader (Just fmt) _ = headMay [r | r <- readers', let rname = rFormat r, rname == fmt] findReader Nothing (Just path) = case prefix of Just fmt -> headMay [r | r <- readers', rFormat r == fmt] @@ -168,16 +172,27 @@ type PrefixedFilePath = FilePath -- | If a filepath is prefixed by one of the reader names and a colon, -- split that off. Eg "csv:-" -> (Just "csv", "-"). -splitReaderPrefix :: PrefixedFilePath -> (Maybe String, FilePath) +-- These reader prefixes can be used to force a specific reader, +-- overriding the file extension. +splitReaderPrefix :: PrefixedFilePath -> (Maybe StorageFormat, FilePath) splitReaderPrefix f = - headDef (Nothing, f) $ - [(Just r, drop (length r + 1) f) | r <- readerNames, (r++":") `isPrefixOf` f] + let + candidates = [(Just r, drop (length r + 1) f) | r <- readerNames ++ ["ssv","tsv"], (r++":") `isPrefixOf` f] + (strPrefix, newF) = headDef (Nothing, f) candidates + in case strPrefix of + Just "csv" -> (Just (Sep Csv), newF) + Just "tsv" -> (Just (Sep Tsv), newF) + Just "ssv" -> (Just (Sep Ssv), newF) + Just "journal" -> (Just Journal', newF) + Just "timeclock" -> (Just Timeclock, newF) + Just "timedot" -> (Just Timedot, newF) + _ -> (Nothing, f) --- ** reader reader :: MonadIO m => Reader m reader = Reader - {rFormat = "journal" + {rFormat = Journal' ,rExtensions = ["journal", "j", "hledger", "ledger"] ,rReadFn = parse ,rParser = journalp -- no need to add command line aliases like journalp' @@ -282,7 +297,7 @@ includedirectivep = do paths <- getFilePaths parentoff parentpos glb let prefixedpaths = case mprefix of Nothing -> paths - Just fmt -> map ((fmt++":")++) paths + Just fmt -> map ((show fmt++":")++) paths forM_ prefixedpaths $ parseChild parentpos void newline diff --git a/hledger-lib/Hledger/Read/RulesReader.hs b/hledger-lib/Hledger/Read/RulesReader.hs index dd8d81ef1..41c9aa063 100644 --- a/hledger-lib/Hledger/Read/RulesReader.hs +++ b/hledger-lib/Hledger/Read/RulesReader.hs @@ -90,7 +90,7 @@ _READER__________________________________________ = undefined -- VSCode outline reader :: MonadIO m => Reader m reader = Reader - {rFormat = "rules" + {rFormat = Rules ,rExtensions = ["rules"] ,rReadFn = parse ,rParser = error' "sorry, rules files can't be included" -- PARTIAL: @@ -135,7 +135,7 @@ parse iopts f _ = do then return nulljournal -- data file inferred from rules file name was not found else do t <- liftIO $ readFileOrStdinPortably dat - readJournalFromCsv (Just $ Left rules) dat t + readJournalFromCsv (Just $ Left rules) dat t Nothing -- apply any command line account aliases. Can fail with a bad replacement pattern. >>= liftEither . journalApplyAliases (aliasesFromOpts iopts) -- journalFinalise assumes the journal's items are @@ -855,9 +855,9 @@ _CSV_READING__________________________________________ = undefined -- -- 4. Return the transactions as a Journal. -- -readJournalFromCsv :: Maybe (Either CsvRules FilePath) -> FilePath -> Text -> ExceptT String IO Journal -readJournalFromCsv Nothing "-" _ = throwError "please use --rules-file when reading CSV from stdin" -readJournalFromCsv merulesfile csvfile csvtext = do +readJournalFromCsv :: Maybe (Either CsvRules FilePath) -> FilePath -> Text -> Maybe SepFormat -> ExceptT String IO Journal +readJournalFromCsv Nothing "-" _ _ = throwError "please use --rules-file when reading CSV from stdin" +readJournalFromCsv merulesfile csvfile csvtext sep = do -- for now, correctness is the priority here, efficiency not so much rules <- case merulesfile of @@ -879,14 +879,19 @@ readJournalFromCsv merulesfile csvfile csvtext = do -- convert back to text and parse as csv records let csvtext1 = T.unlines csvlines2 - separator = - case getDirective "separator" rules >>= parseSeparator of - Just c -> c - _ | ext == "ssv" -> ';' - _ | ext == "tsv" -> '\t' - _ -> ',' - where - ext = map toLower $ drop 1 $ takeExtension csvfile + -- The separator in the rules file takes precedence over the extension or prefix + separator = case getDirective "separator" rules >>= parseSeparator of + Just c -> c + _ | ext == "ssv" -> ';' + _ | ext == "tsv" -> '\t' + _ -> + case sep of + Just Csv -> ',' + Just Ssv -> ';' + Just Tsv -> '\t' + Nothing -> ',' + where + ext = map toLower $ drop 1 $ takeExtension csvfile -- parsec seemed to fail if you pass it "-" here -- TODO: try again with megaparsec parsecfilename = if csvfile == "-" then "(stdin)" else csvfile dbg6IO "using separator" separator diff --git a/hledger-lib/Hledger/Read/TimeclockReader.hs b/hledger-lib/Hledger/Read/TimeclockReader.hs index b86479a7b..fdfd930e4 100644 --- a/hledger-lib/Hledger/Read/TimeclockReader.hs +++ b/hledger-lib/Hledger/Read/TimeclockReader.hs @@ -77,7 +77,7 @@ import Data.Text as T (strip) reader :: MonadIO m => Reader m reader = Reader - {rFormat = "timeclock" + {rFormat = Timeclock ,rExtensions = ["timeclock"] ,rReadFn = parse ,rParser = timeclockfilep diff --git a/hledger-lib/Hledger/Read/TimedotReader.hs b/hledger-lib/Hledger/Read/TimedotReader.hs index 14568bbf3..7c7bed673 100644 --- a/hledger-lib/Hledger/Read/TimedotReader.hs +++ b/hledger-lib/Hledger/Read/TimedotReader.hs @@ -66,7 +66,7 @@ import Data.List (group) reader :: MonadIO m => Reader m reader = Reader - {rFormat = "timedot" + {rFormat = Timedot ,rExtensions = ["timedot"] ,rReadFn = parse ,rParser = timedotp diff --git a/hledger/Hledger/Cli/CliOptions.hs b/hledger/Hledger/Cli/CliOptions.hs index 15d15ab5f..d733794d3 100644 --- a/hledger/Hledger/Cli/CliOptions.hs +++ b/hledger/Hledger/Cli/CliOptions.hs @@ -625,7 +625,7 @@ expandPathPreservingPrefix d prefixedf = do let (p,f) = splitReaderPrefix prefixedf f' <- expandPath d f return $ case p of - Just p' -> p' ++ ":" ++ f' + Just p' -> (show p') ++ ":" ++ f' Nothing -> f' -- | Get the expanded, absolute output file path specified by an diff --git a/hledger/test/csv.test b/hledger/test/csv.test index f4b1ffc69..905fcfe44 100644 --- a/hledger/test/csv.test +++ b/hledger/test/csv.test @@ -1131,6 +1131,19 @@ $ ./csvtest.sh >= +# ** 59. specify ssv prefix and no extension +< +12/11/2019;Foo;123;10.23 +RULES +fields date, description, , amount +date-format %d/%m/%Y +$ ./ssvtest.sh +2019-11-12 Foo + expenses:unknown 10.23 + income:unknown -10.23 + +>= + # ** . #< #$ ./csvtest.sh diff --git a/hledger/test/ssvtest.sh b/hledger/test/ssvtest.sh new file mode 100755 index 000000000..b02be2dd5 --- /dev/null +++ b/hledger/test/ssvtest.sh @@ -0,0 +1,22 @@ +#!/bin/sh +# +# sh version, ported from bash so freebsd users can run these tests. +# This scripts expects stdin formatted like this: +# +# RULES +# +# +# Here, unlike in csvtest.sh, the ssv extension is intentionally NOT set +# This allows us to verify that the prefix detection is working + +cat > t.$$.input +sed '1,/^RULES/d' t.$$.input > t.$$.rules +sed '/^RULES/,$d' t.$$.input > t.$$ + +trap 'rm -f t.$$.input t.$$ t.$$.rules t.$$.stderr' EXIT + +# Remove variable file name from error messages +mkfifo t.$$.stderr +sed -Ee "s/t\.$$/input/" t.$$.stderr >&2 & + +hledger -f ssv:t.$$ print "$@" 2> t.$$.stderr