diff --git a/.gitignore b/.gitignore index a07408d16..e0dfd5fbd 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,7 @@ _* # dev stuff .build .idea -/*.iml +*.iml .shake .tmp .vscode diff --git a/hledger-lib/Hledger/Data/RawOptions.hs b/hledger-lib/Hledger/Data/RawOptions.hs index b2df79d49..412c595b4 100644 --- a/hledger-lib/Hledger/Data/RawOptions.hs +++ b/hledger-lib/Hledger/Data/RawOptions.hs @@ -17,7 +17,8 @@ module Hledger.Data.RawOptions ( maybestringopt, listofstringopt, intopt, - maybeintopt + maybeintopt, + maybecharopt ) where @@ -50,6 +51,9 @@ maybestringopt name = maybe Nothing (Just . T.unpack . stripquotes . T.pack) . l stringopt :: String -> RawOpts -> String stringopt name = fromMaybe "" . maybestringopt name +maybecharopt :: String -> RawOpts -> Maybe Char +maybecharopt name rawopts = lookup name rawopts >>= headMay + listofstringopt :: String -> RawOpts -> [String] listofstringopt name rawopts = [v | (k,v) <- rawopts, k==name] diff --git a/hledger-lib/Hledger/Read/Common.hs b/hledger-lib/Hledger/Read/Common.hs index a6b9dac3a..3dc07e4a1 100644 --- a/hledger-lib/Hledger/Read/Common.hs +++ b/hledger-lib/Hledger/Read/Common.hs @@ -158,6 +158,7 @@ data InputOpts = InputOpts { mformat_ :: Maybe StorageFormat -- ^ a file/storage format to try, unless overridden -- by a filename prefix. Nothing means try all. ,mrules_file_ :: Maybe FilePath -- ^ a conversion rules file to use (when reading CSV) + ,separator_ :: Char -- ^ the separator to use (when reading CSV) ,aliases_ :: [String] -- ^ account name aliases to apply ,anon_ :: Bool -- ^ do light anonymisation/obfuscation of the data ,ignore_assertions_ :: Bool -- ^ don't check balance assertions @@ -170,13 +171,14 @@ data InputOpts = InputOpts { instance Default InputOpts where def = definputopts definputopts :: InputOpts -definputopts = InputOpts def def def def def def True def def +definputopts = InputOpts def def ',' def def def def True def def rawOptsToInputOpts :: RawOpts -> InputOpts rawOptsToInputOpts rawopts = InputOpts{ -- files_ = map (T.unpack . stripquotes . T.pack) $ listofstringopt "file" rawopts mformat_ = Nothing ,mrules_file_ = maybestringopt "rules-file" rawopts + ,separator_ = fromMaybe ',' (maybecharopt "separator" rawopts) ,aliases_ = map (T.unpack . stripquotes . T.pack) $ listofstringopt "alias" rawopts ,anon_ = boolopt "anon" rawopts ,ignore_assertions_ = boolopt "ignore-assertions" rawopts diff --git a/hledger-lib/Hledger/Read/CsvReader.hs b/hledger-lib/Hledger/Read/CsvReader.hs index 4a1f1453b..34dae98a0 100644 --- a/hledger-lib/Hledger/Read/CsvReader.hs +++ b/hledger-lib/Hledger/Read/CsvReader.hs @@ -18,23 +18,25 @@ module Hledger.Read.CsvReader ( reader, -- * Misc. CsvRecord, + CSV, Record, Field, -- rules, rulesFileFor, parseRulesFile, parseAndValidateCsvRules, expandIncludes, transactionFromCsvRecord, + printCSV, -- * Tests tests_CsvReader, ) where import Prelude () -import "base-compat-batteries" Prelude.Compat hiding (getContents) +import "base-compat-batteries" Prelude.Compat import Control.Exception hiding (try) import Control.Monad import Control.Monad.Except import Control.Monad.State.Strict (StateT, get, modify', evalStateT) -import Data.Char (toLower, isDigit, isSpace) +import Data.Char (toLower, isDigit, isSpace, ord) import "base-compat-batteries" Data.List.Compat import Data.List.NonEmpty (fromList) import Data.Maybe @@ -42,6 +44,7 @@ import Data.Ord import qualified Data.Set as S import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Encoding as T import qualified Data.Text.IO as T import Data.Time.Calendar (Day) #if MIN_VERSION_time(1,5,0) @@ -53,17 +56,28 @@ import System.Locale (defaultTimeLocale) import Safe import System.Directory (doesFileExist) import System.FilePath -import Text.CSV (parseCSV, CSV) +import qualified Data.Csv as Cassava +import qualified Data.Csv.Parser.Megaparsec as CassavaMP +import qualified Data.ByteString as B +import Data.ByteString.Lazy (fromStrict) +import Data.Foldable import Text.Megaparsec hiding (parse) import Text.Megaparsec.Char -import qualified Text.Parsec as Parsec import Text.Printf (printf) +import Data.Word import Hledger.Data -import Hledger.Utils.UTF8IOCompat (getContents) import Hledger.Utils import Hledger.Read.Common (Reader(..),InputOpts(..),amountp, statusp, genericSourcePos) +type CSV = [Record] + +type Record = [Field] + +type Field = String + +data CSVError = CSVError (ParseError Word8 CassavaMP.ConversionError) + deriving Show reader :: Reader reader = Reader @@ -78,7 +92,8 @@ reader = Reader parse :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal parse iopts f t = do let rulesfile = mrules_file_ iopts - r <- liftIO $ readJournalFromCsv rulesfile f t + let separator = separator_ iopts + r <- liftIO $ readJournalFromCsv separator rulesfile f t case r of Left e -> throwError e Right j -> return $ journalNumberAndTieTransactions j -- XXX does not use parseAndFinaliseJournal like the other readers @@ -92,11 +107,11 @@ parse iopts f t = do -- 2. parse the CSV data, or throw a parse error -- 3. convert the CSV records to transactions using the rules -- 4. if the rules file didn't exist, create it with the default rules and filename --- 5. return the transactions as a Journal +-- 5. return the transactions as a Journal -- @ -readJournalFromCsv :: Maybe FilePath -> FilePath -> Text -> IO (Either String Journal) -readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when reading CSV from stdin" -readJournalFromCsv mrulesfile csvfile csvdata = +readJournalFromCsv :: Char -> Maybe FilePath -> FilePath -> Text -> IO (Either String Journal) +readJournalFromCsv _ Nothing "-" _ = return $ Left "please use --rules-file when reading CSV from stdin" +readJournalFromCsv separator mrulesfile csvfile csvdata = handle (\e -> return $ Left $ show (e :: IOException)) $ do let throwerr = throw.userError @@ -109,7 +124,7 @@ readJournalFromCsv mrulesfile csvfile csvdata = dbg1IO "using conversion rules file" rulesfile liftIO $ (readFilePortably rulesfile >>= expandIncludes (takeDirectory rulesfile)) else return $ defaultRulesText rulesfile - rules <- liftIO (runExceptT $ parseAndValidateCsvRules rulesfile rulestext) >>= either throwerr return + rules <- liftIO (runExceptT $ parseAndValidateCsvRules rulesfile rulestext) >>= either throwerr return dbg2IO "rules" rules -- apply skip directive @@ -124,17 +139,17 @@ readJournalFromCsv mrulesfile csvfile csvdata = records <- (either throwerr id . dbg2 "validateCsv" . validateCsv skip . dbg2 "parseCsv") - `fmap` parseCsv parsecfilename (T.unpack csvdata) + `fmap` parseCsv separator parsecfilename csvdata dbg1IO "first 3 csv records" $ take 3 records -- identify header lines -- let (headerlines, datalines) = identifyHeaderLines records -- mfieldnames = lastMay headerlines - let + let -- convert CSV records to transactions txns = snd $ mapAccumL - (\pos r -> + (\pos r -> let SourcePos name line col = pos line' = (mkPos . (+1) . unPos) line @@ -146,16 +161,16 @@ readJournalFromCsv mrulesfile csvfile csvdata = -- Ensure transactions are ordered chronologically. -- First, reverse them to get same-date transactions ordered chronologically, - -- if the CSV records seem to be most-recent-first, ie if there's an explicit + -- if the CSV records seem to be most-recent-first, ie if there's an explicit -- "newest-first" directive, or if there's more than one date and the first date -- is more recent than the last. - txns' = + txns' = (if newestfirst || mseemsnewestfirst == Just True then reverse else id) txns where newestfirst = dbg3 "newestfirst" $ isJust $ getDirective "newest-first" rules - mseemsnewestfirst = dbg3 "mseemsnewestfirst" $ - case nub $ map tdate txns of - ds | length ds > 1 -> Just $ head ds > last ds + mseemsnewestfirst = dbg3 "mseemsnewestfirst" $ + case nub $ map tdate txns of + ds | length ds > 1 -> Just $ head ds > last ds _ -> Nothing -- Second, sort by date. txns'' = sortBy (comparing tdate) txns' @@ -166,14 +181,41 @@ readJournalFromCsv mrulesfile csvfile csvdata = return $ Right nulljournal{jtxns=txns''} -parseCsv :: FilePath -> String -> IO (Either Parsec.ParseError CSV) -parseCsv path csvdata = - case path of - "-" -> liftM (parseCSV "(stdin)") getContents - _ -> return $ parseCSV path csvdata +parseCsv :: Char -> FilePath -> Text -> IO (Either CSVError CSV) +parseCsv separator filePath csvdata = + case filePath of + "-" -> liftM (parseCassava separator "(stdin)") T.getContents + _ -> return $ parseCassava separator filePath csvdata + +parseCassava :: Char -> FilePath -> Text -> Either CSVError CSV +parseCassava separator path content = + case parseResult of + Left msg -> Left $ CSVError msg + Right a -> Right a + where parseResult = fmap parseResultToCsv $ CassavaMP.decodeWith (decodeOptions separator) Cassava.NoHeader path lazyContent + lazyContent = fromStrict $ T.encodeUtf8 content + +decodeOptions :: Char -> Cassava.DecodeOptions +decodeOptions separator = Cassava.defaultDecodeOptions { + Cassava.decDelimiter = fromIntegral (ord separator) + } + +parseResultToCsv :: (Foldable t, Functor t) => t (t B.ByteString) -> CSV +parseResultToCsv = toListList . unpackFields + where + toListList = toList . fmap toList + unpackFields = (fmap . fmap) (T.unpack . T.decodeUtf8) + +printCSV :: CSV -> String +printCSV records = unlined (printRecord `map` records) + where printRecord = concat . intersperse "," . map printField + printField f = "\"" ++ concatMap escape f ++ "\"" + escape '"' = "\"\"" + escape x = [x] + unlined = concat . intersperse "\n" -- | Return the cleaned up and validated CSV data (can be empty), or an error. -validateCsv :: Int -> Either Parsec.ParseError CSV -> Either String [CsvRecord] +validateCsv :: Int -> Either CSVError CSV -> Either String [CsvRecord] validateCsv _ (Left e) = Left $ show e validateCsv numhdrlines (Right rs) = validate $ drop numhdrlines $ filternulls rs where @@ -363,11 +405,11 @@ getDirective directivename = lookup directivename . rdirectives instance ShowErrorComponent String where showErrorComponent = id --- | An error-throwing action that parses this file's content --- as CSV conversion rules, interpolating any included files first, +-- | An error-throwing action that parses this file's content +-- as CSV conversion rules, interpolating any included files first, -- and runs some extra validation checks. parseRulesFile :: FilePath -> ExceptT String IO CsvRules -parseRulesFile f = +parseRulesFile f = liftIO (readFilePortably f >>= expandIncludes (takeDirectory f)) >>= parseAndValidateCsvRules f -- | Inline all files referenced by include directives in this hledger CSV rules text, recursively. @@ -381,9 +423,9 @@ expandIncludes dir content = mapM (expandLine dir) (T.lines content) >>= return where f' = dir dropWhile isSpace (T.unpack f) dir' = takeDirectory f' - _ -> return line + _ -> return line --- | An error-throwing action that parses this text as CSV conversion rules +-- | An error-throwing action that parses this text as CSV conversion rules -- and runs some extra validation checks. The file path is for error messages. parseAndValidateCsvRules :: FilePath -> T.Text -> ExceptT String IO CsvRules parseAndValidateCsvRules rulesfile s = do @@ -513,8 +555,8 @@ journalfieldnamep = do lift (dbgparse 2 "trying journalfieldnamep") T.unpack <$> choiceInState (map (lift . string . T.pack) journalfieldnames) --- Transaction fields and pseudo fields for CSV conversion. --- Names must precede any other name they contain, for the parser +-- Transaction fields and pseudo fields for CSV conversion. +-- Names must precede any other name they contain, for the parser -- (amount-in before amount; date2 before date). TODO: fix journalfieldnames = [ "account1" @@ -684,7 +726,7 @@ transactionFromCsvRecord sourcepos rules record = t account1 = T.pack $ maybe "" render (mfieldtemplate "account1") `or` defaccount1 account2 = T.pack $ maybe "" render (mfieldtemplate "account2") `or` defaccount2 balance = maybe Nothing (parsebalance.render) $ mfieldtemplate "balance" - parsebalance str + parsebalance str | all isSpace str = Nothing | otherwise = Just $ (either (balanceerror str) id $ runParser (evalStateT (amountp <* eof) mempty) "" $ T.pack $ (currency++) $ simplifySign str, nullsourcepos) balanceerror str err = error' $ unlines @@ -738,7 +780,7 @@ getAmountStr rules record = type CsvAmountString = String -- | Canonicalise the sign in a CSV amount string. --- Such strings can have a minus sign, negating parentheses, +-- Such strings can have a minus sign, negating parentheses, -- or any two of these (which cancels out). -- -- >>> simplifySign "1" @@ -840,15 +882,15 @@ tests_CsvReader = tests "CsvReader" [ ,tests "rulesp" [ test "trailing comments" $ parseWithState' rules rulesp "skip\n# \n#\n" `is` Right rules{rdirectives = [("skip","")]} - + ,test "trailing blank lines" $ parseWithState' rules rulesp "skip\n\n \n" `is` (Right rules{rdirectives = [("skip","")]}) - + ,test "no final newline" $ parseWithState' rules rulesp "skip" `is` (Right rules{rdirectives=[("skip","")]}) ,test "assignment with empty value" $ - parseWithState' rules rulesp "account1 \nif foo\n account2 foo\n" `is` + parseWithState' rules rulesp "account1 \nif foo\n account2 foo\n" `is` (Right rules{rassignments = [("account1","")], rconditionalblocks = [([["foo"]],[("account2","foo")])]}) ] diff --git a/hledger-lib/hledger-lib.cabal b/hledger-lib/hledger-lib.cabal index a3f83e4ed..61e8c09c1 100644 --- a/hledger-lib/hledger-lib.cabal +++ b/hledger-lib/hledger-lib.cabal @@ -2,7 +2,7 @@ -- -- see: https://github.com/sol/hpack -- --- hash: 642c6b4607959713188c82341f1050872ec6111a64f8e4b4cc1c1630da585baf +-- hash: 7d48cc897fb582a2600c3f3405a5463b853316f4a9fae370f0a74c46576a6198 name: hledger-lib version: 1.10.99 @@ -111,9 +111,10 @@ library , blaze-markup >=0.5.1 , bytestring , call-stack + , cassava + , cassava-megaparsec , cmdargs >=0.10 , containers - , csv , data-default >=0.5 , deepseq , directory @@ -209,9 +210,10 @@ test-suite doctests , blaze-markup >=0.5.1 , bytestring , call-stack + , cassava + , cassava-megaparsec , cmdargs >=0.10 , containers - , csv , data-default >=0.5 , deepseq , directory @@ -308,9 +310,10 @@ test-suite easytests , blaze-markup >=0.5.1 , bytestring , call-stack + , cassava + , cassava-megaparsec , cmdargs >=0.10 , containers - , csv , data-default >=0.5 , deepseq , directory diff --git a/hledger-lib/package.yaml b/hledger-lib/package.yaml index 191785aa7..27f24573b 100644 --- a/hledger-lib/package.yaml +++ b/hledger-lib/package.yaml @@ -48,7 +48,8 @@ dependencies: - call-stack - cmdargs >=0.10 - containers -- csv +- cassava +- cassava-megaparsec - data-default >=0.5 - Decimal - deepseq diff --git a/hledger/Hledger/Cli/CliOptions.hs b/hledger/Hledger/Cli/CliOptions.hs index 28f812e86..1a5e7b165 100644 --- a/hledger/Hledger/Cli/CliOptions.hs +++ b/hledger/Hledger/Cli/CliOptions.hs @@ -120,6 +120,7 @@ inputflags :: [Flag RawOpts] inputflags = [ flagReq ["file","f"] (\s opts -> Right $ setopt "file" s opts) "FILE" "use a different input file. For stdin, use - (default: $LEDGER_FILE or $HOME/.hledger.journal)" ,flagReq ["rules-file"] (\s opts -> Right $ setopt "rules-file" s opts) "RFILE" "CSV conversion rules file (default: FILE.rules)" + ,flagReq ["separator"] (\s opts -> Right $ setopt "separator" s opts) "SEPARATOR" "CSV separator (default: ,)" ,flagReq ["alias"] (\s opts -> Right $ setopt "alias" s opts) "OLD=NEW" "rename accounts named OLD to NEW" ,flagNone ["anon"] (setboolopt "anon") "anonymize accounts and payees" ,flagReq ["pivot"] (\s opts -> Right $ setopt "pivot" s opts) "TAGNAME" "use some other field/tag for account names" diff --git a/hledger/Hledger/Cli/Commands/Balance.hs b/hledger/Hledger/Cli/Commands/Balance.hs index 40ec3c42d..ecd3320a1 100644 --- a/hledger/Hledger/Cli/Commands/Balance.hs +++ b/hledger/Hledger/Cli/Commands/Balance.hs @@ -258,14 +258,15 @@ import qualified Data.Text as T import qualified Data.Text.Lazy as TL import System.Console.CmdArgs.Explicit as C import Lucid as L -import Text.CSV +import Test.HUnit() import Text.Printf (printf) import Text.Tabular as T --import Text.Tabular.AsciiWide -import Hledger +import Hledger import Hledger.Cli.CliOptions import Hledger.Cli.Utils +import Hledger.Read.CsvReader (CSV, printCSV) -- | Command line options for this command. diff --git a/hledger/Hledger/Cli/Commands/Print.hs b/hledger/Hledger/Cli/Commands/Print.hs index 58a17ed14..83f40483f 100644 --- a/hledger/Hledger/Cli/Commands/Print.hs +++ b/hledger/Hledger/Cli/Commands/Print.hs @@ -17,7 +17,8 @@ where import Data.Text (Text) import qualified Data.Text as T import System.Console.CmdArgs.Explicit -import Text.CSV +import Test.HUnit() +import Hledger.Read.CsvReader (CSV, printCSV) import Hledger import Hledger.Cli.CliOptions diff --git a/hledger/Hledger/Cli/Commands/Register.hs b/hledger/Hledger/Cli/Commands/Register.hs index a414e62b9..309b0c6b7 100644 --- a/hledger/Hledger/Cli/Commands/Register.hs +++ b/hledger/Hledger/Cli/Commands/Register.hs @@ -20,13 +20,13 @@ import Data.Maybe -- import Data.Text (Text) import qualified Data.Text as T import System.Console.CmdArgs.Explicit -import Text.CSV +import Hledger.Read.CsvReader (CSV, Record, printCSV) +import Test.HUnit() -import Hledger +import Hledger import Hledger.Cli.CliOptions import Hledger.Cli.Utils - registermode = (defCommandMode $ ["register"] ++ aliases) { modeHelp = "show postings and running total. With --date2, show and sort by secondary date instead." `withAliases` aliases ,modeGroupFlags = Group { diff --git a/hledger/Hledger/Cli/CompoundBalanceCommand.hs b/hledger/Hledger/Cli/CompoundBalanceCommand.hs index aea87afed..3130884f9 100644 --- a/hledger/Hledger/Cli/CompoundBalanceCommand.hs +++ b/hledger/Hledger/Cli/CompoundBalanceCommand.hs @@ -18,7 +18,7 @@ import Data.Maybe (fromMaybe) import qualified Data.Text as TS import qualified Data.Text.Lazy as TL import System.Console.CmdArgs.Explicit as C -import Text.CSV +import Hledger.Read.CsvReader (CSV, printCSV) import Lucid as L import Text.Tabular as T diff --git a/hledger/hledger.cabal b/hledger/hledger.cabal index e6f887671..344d38030 100644 --- a/hledger/hledger.cabal +++ b/hledger/hledger.cabal @@ -2,7 +2,7 @@ -- -- see: https://github.com/sol/hpack -- --- hash: 3be7e8745a826dbfc9d0007b9b37c3962486573614267365e6dafb8f7079ece6 +-- hash: 670748bbdefdd5950fbc676e79a7c3924edbe21ac333141915b5509e799fa071 name: hledger version: 1.10.99 @@ -120,7 +120,6 @@ library , bytestring , cmdargs >=0.10 , containers - , csv , data-default >=0.5 , directory , easytest @@ -173,7 +172,6 @@ executable hledger , bytestring , cmdargs >=0.10 , containers - , csv , data-default >=0.5 , directory , easytest @@ -228,7 +226,6 @@ test-suite test , bytestring , cmdargs >=0.10 , containers - , csv , data-default >=0.5 , directory , easytest @@ -283,7 +280,6 @@ benchmark bench , cmdargs >=0.10 , containers , criterion - , csv , data-default >=0.5 , directory , easytest diff --git a/hledger/package.yaml b/hledger/package.yaml index 5f3f99d57..6061af095 100644 --- a/hledger/package.yaml +++ b/hledger/package.yaml @@ -85,7 +85,6 @@ dependencies: - bytestring - cmdargs >=0.10 - containers -- csv - data-default >=0.5 - Decimal - directory diff --git a/stack.yaml b/stack.yaml index c5653a11a..b5814d9c9 100644 --- a/stack.yaml +++ b/stack.yaml @@ -9,7 +9,8 @@ packages: - hledger-web - hledger-api -#extra-deps: +extra-deps: +- cassava-megaparsec-1.0.0 nix: pure: false diff --git a/tests/csv/csv-read.test b/tests/csv/csv-read.test index 30303ad88..8fd8bdba5 100644 --- a/tests/csv/csv-read.test +++ b/tests/csv/csv-read.test @@ -17,20 +17,21 @@ # 2. reading CSV with in-field and out-field printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescription %%2\namount-in %%3\namount-out %%4\ncurrency $\n' >t.$$.csv.rules ; hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules <<< -10/2009/09,Flubber Co,50, -11/2009/09,Flubber Co,,50 +10/2009/09,Flubber Co🎅,50, +11/2009/09,Flubber Co🎅,,50 >>> -2009/09/10 Flubber Co +2009/09/10 Flubber Co🎅 Assets:MyAccount $50 income:unknown $-50 -2009/09/11 Flubber Co +2009/09/11 Flubber Co🎅 Assets:MyAccount $-50 expenses:unknown $50 >>>2 >>>=0 + # 3. handle conditions assigning multiple fields printf 'fields date, description, amount\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\nif Flubber\n account2 acct\n comment cmt' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules >>> @@ -92,3 +93,20 @@ >>>2 >>>=0 + +# 8. reading CSV with custom separator + printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescription %%2\namount-in %%3\namount-out %%4\ncurrency $\n' >t.$$.csv.rules ; hledger --separator ';' -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules +<<< +10/2009/09;Flubber Co🎅;50; +11/2009/09;Flubber Co🎅;;50 +>>> +2009/09/10 Flubber Co🎅 + Assets:MyAccount $50 + income:unknown $-50 + +2009/09/11 Flubber Co🎅 + Assets:MyAccount $-50 + expenses:unknown $50 + +>>>2 +>>>=0 \ No newline at end of file