csv: merge lucamolteni's cassava/custom separators (squashed) (#829)

commit 5ba464de761b298e50d57a8b7d14bc28adb30d5d
Author: Luca Molteni <volothamp@gmail.com>
Date:   Fri Sep 7 17:54:12 2018 +0200

    Fix CI 2

commit f060ae9449f4b61a915b0ed4629fc1ba9b66fb4a
Author: Luca Molteni <volothamp@gmail.com>
Date:   Fri Sep 7 17:30:08 2018 +0200

    Fix CI build

commit af0719a33b9b72ad244ae80198d881a1f7145e9d
Author: Luca Molteni <volothamp@gmail.com>
Date:   Fri Sep 7 17:19:01 2018 +0200

    Fix rebase

commit 1a24ddfa54dfb4ff1326e1a51005ffa82d3dc3c8
Author: Luca Molteni <volothamp@gmail.com>
Date:   Fri Aug 10 16:25:24 2018 +0200

    Fixed some GHC warnings

commit 1ac43398a359b5925ef71f53347698f1c6c510ef
Author: Luca Molteni <volothamp@gmail.com>
Date:   Fri Aug 10 16:14:49 2018 +0200

    Fix .cabal

commit 422456b925d8aa4ab3e869f51e98c2b1c3dcde0a
Author: Luca Molteni <volothamp@gmail.com>
Date:   Sun Jul 1 22:56:20 2018 +0200

    Removed to-do list

commit 1118b762e4fd15c4fe7ba48ba86676706ea3a5a5
Author: Luca Molteni <volothamp@gmail.com>
Date:   Sun Jul 1 22:53:28 2018 +0200

    Better test

commit 1146ed0941655668bf7684f18aa15c5f4b9b20c2
Author: Luca Molteni <volothamp@gmail.com>
Date:   Sun Jul 1 15:32:28 2018 +0200

    Fix parsing

commit 4fc2374b2b81802990da30c96756aab54d77399c
Author: Luca Molteni <volothamp@gmail.com>
Date:   Thu Jun 21 22:11:11 2018 +0200

    Parsing of separator

commit f7a61737f1ad4460ba20ca9b2e86eb21468abb33
Author: Luca Molteni <volothamp@gmail.com>
Date:   Thu Jun 21 14:29:23 2018 +0200

    Almost separator in options

commit ac8841cf3b9c80914bc3271ad9b9ff4ae9ba48a7
Author: Luca Molteni <volothamp@gmail.com>
Date:   Thu Jun 21 14:16:59 2018 +0200

    Separator in parseCSV

commit 92a8b9f6ba77ea4237f769641e03029ac88542ea
Author: Luca Molteni <volothamp@gmail.com>
Date:   Thu Jun 21 13:30:41 2018 +0200

    separator option

commit ec417a81ae625647cf35e61776cdf02bdb2c6aea
Author: Luca Molteni <volothamp@gmail.com>
Date:   Thu Jun 21 10:45:26 2018 +0200

    Removed one qualified import

commit 8b2f386c2f780adcd34cff3de7edceacc1d325a7
Author: Luca Molteni <volothamp@gmail.com>
Date:   Wed Jun 20 14:01:12 2018 +0200

    Removed string conversions

commit a14d0e099e28a286bb81770cfc9cb8f5c7e5cf1f
Author: Luca Molteni <volothamp@gmail.com>
Date:   Wed Jun 20 10:23:20 2018 +0200

    custom delimiter in cassava

commit 694d48e2bc1ada0037b90367c017f3082f68ed45
Author: Luca Molteni <volothamp@gmail.com>
Date:   Sun Jun 10 17:51:54 2018 +0200

    Use Text.getContents - remove UTF-8 compatibility library

commit a7ada2cc60033ebdd796ca34cc2ec69a4f387843
Author: Luca Molteni <volothamp@gmail.com>
Date:   Sun Jun 10 17:49:34 2018 +0200

    todo list

commit 58ec47d3987909f6bace50e3e647e30dadd5bf03
Author: Luca Molteni <volothamp@gmail.com>
Date:   Sun Jun 10 17:45:22 2018 +0200

    CSV test now has unicode characters

commit b7851e94c3f1683b63ec7250a12bcde3b7bed691
Author: Luca Molteni <volothamp@gmail.com>
Date:   Sun Jun 10 16:59:39 2018 +0200

    Use decode from Text

commit 79f59fd28ccaca08fcd718fcd8d00b1c1d65d7e1
Author: Luca Molteni <volothamp@gmail.com>
Date:   Sun Jun 10 13:28:57 2018 +0200

    Use Text and Lazy Bytestring

commit 470c9bcb8dc00669beb4ef0303a1e7d9f7aecc89
Author: Luca Molteni <volothamp@gmail.com>
Date:   Sat Jun 9 15:30:22 2018 +0200

    Use megaparsec error

commit f978848ba249ef4f67b855bea5d4e549290c205c
Author: Luca Molteni <volothamp@gmail.com>
Date:   Sat Jun 9 15:22:07 2018 +0200

    Renamed qualify and remove Parsec

commit 152587fde204c43a55798d212e43f37cd3038c2e
Author: Luca Molteni <volothamp@gmail.com>
Date:   Sat Jun 9 15:12:36 2018 +0200

    Use cassava mega parsec

commit cf281577a3d3a071196484a6fc8485f2ea1f7d67
Author: Luca Molteni <volothamp@gmail.com>
Date:   Sat Jun 9 14:01:47 2018 +0200

    Removed Data.Vector

commit 1272e8e758369d8cc5778029a705b277355a5029
Author: Luca Molteni <volothamp@gmail.com>
Date:   Sat Jun 9 12:16:18 2018 +0200

    Removed Parsec ParseError

commit ae07f043135a19307fd65b281ade37a74c76acb2
Author: Luca Molteni <volothamp@gmail.com>
Date:   Sat Jun 9 12:06:14 2018 +0200

    Type sinonim for ParsecError

commit 8e15b253c11bd1c0c35a7641aeb18aa54e0ba9b0
Author: Luca Molteni <volothamp@gmail.com>
Date:   Sat Jun 9 11:16:08 2018 +0200

    Replaced with typeclasses

commit 1ed46f9c175603611325f3d377004e4b85f29377
Author: Luca Molteni <volothamp@gmail.com>
Date:   Sat Jun 9 11:01:33 2018 +0200

    Replaced Text/CSV with Cassava

commit 362f4111b5854145703174b976fc7acbd71b8783
Author: Luca Molteni <volothamp@gmail.com>
Date:   Sat Jun 9 10:34:37 2018 +0200

    Use cassava parsin instead of Text/CSV

commit 83e678e371618687cf7c15a4e2cfa67f570b6b64
Author: Luca Molteni <volothamp@gmail.com>
Date:   Sat Jun 9 08:22:51 2018 +0200

    Text CSV error messages

commit f922df71d274beeacab9fb2530b16c97f005cc08
Author: Luca Molteni <volothamp@gmail.com>
Date:   Fri Jun 8 21:45:20 2018 +0200

    Better types

commit edd130781c84790a53bff2283e6041eb8232e7cf
Author: Luca Molteni <volothamp@gmail.com>
Date:   Fri Jun 8 21:34:59 2018 +0200

    Conversion to Text CSV type

commit 0799383214483018ad2d977a3c8022414959c2b2
Author: Luca Molteni <volothamp@gmail.com>
Date:   Fri Jun 8 16:06:21 2018 +0200

    First function with cassava

commit e92aeb151ff527b383ff3d0ced7764e81b71af82
Author: Luca Molteni <volothamp@gmail.com>
Date:   Fri Jun 8 13:47:34 2018 +0200

    Added cassava as dependency

commit 5ea005c558a3939af7e5f0cd735a9b4da931228e
Author: Luca Molteni <volothamp@gmail.com>
Date:   Fri Jun 8 13:18:47 2018 +0200

    Better .gitignore for multi idea modules
This commit is contained in:
Simon Michael 2018-09-07 10:12:13 -07:00
parent 758c1fbc25
commit 23bdac41d9
15 changed files with 132 additions and 63 deletions

2
.gitignore vendored
View File

@ -6,7 +6,7 @@ _*
# dev stuff
.build
.idea
/*.iml
*.iml
.shake
.tmp
.vscode

View File

@ -17,7 +17,8 @@ module Hledger.Data.RawOptions (
maybestringopt,
listofstringopt,
intopt,
maybeintopt
maybeintopt,
maybecharopt
)
where
@ -50,6 +51,9 @@ maybestringopt name = maybe Nothing (Just . T.unpack . stripquotes . T.pack) . l
stringopt :: String -> RawOpts -> String
stringopt name = fromMaybe "" . maybestringopt name
maybecharopt :: String -> RawOpts -> Maybe Char
maybecharopt name rawopts = lookup name rawopts >>= headMay
listofstringopt :: String -> RawOpts -> [String]
listofstringopt name rawopts = [v | (k,v) <- rawopts, k==name]

View File

@ -158,6 +158,7 @@ data InputOpts = InputOpts {
mformat_ :: Maybe StorageFormat -- ^ a file/storage format to try, unless overridden
-- by a filename prefix. Nothing means try all.
,mrules_file_ :: Maybe FilePath -- ^ a conversion rules file to use (when reading CSV)
,separator_ :: Char -- ^ the separator to use (when reading CSV)
,aliases_ :: [String] -- ^ account name aliases to apply
,anon_ :: Bool -- ^ do light anonymisation/obfuscation of the data
,ignore_assertions_ :: Bool -- ^ don't check balance assertions
@ -170,13 +171,14 @@ data InputOpts = InputOpts {
instance Default InputOpts where def = definputopts
definputopts :: InputOpts
definputopts = InputOpts def def def def def def True def def
definputopts = InputOpts def def ',' def def def def True def def
rawOptsToInputOpts :: RawOpts -> InputOpts
rawOptsToInputOpts rawopts = InputOpts{
-- files_ = map (T.unpack . stripquotes . T.pack) $ listofstringopt "file" rawopts
mformat_ = Nothing
,mrules_file_ = maybestringopt "rules-file" rawopts
,separator_ = fromMaybe ',' (maybecharopt "separator" rawopts)
,aliases_ = map (T.unpack . stripquotes . T.pack) $ listofstringopt "alias" rawopts
,anon_ = boolopt "anon" rawopts
,ignore_assertions_ = boolopt "ignore-assertions" rawopts

View File

@ -18,23 +18,25 @@ module Hledger.Read.CsvReader (
reader,
-- * Misc.
CsvRecord,
CSV, Record, Field,
-- rules,
rulesFileFor,
parseRulesFile,
parseAndValidateCsvRules,
expandIncludes,
transactionFromCsvRecord,
printCSV,
-- * Tests
tests_CsvReader,
)
where
import Prelude ()
import "base-compat-batteries" Prelude.Compat hiding (getContents)
import "base-compat-batteries" Prelude.Compat
import Control.Exception hiding (try)
import Control.Monad
import Control.Monad.Except
import Control.Monad.State.Strict (StateT, get, modify', evalStateT)
import Data.Char (toLower, isDigit, isSpace)
import Data.Char (toLower, isDigit, isSpace, ord)
import "base-compat-batteries" Data.List.Compat
import Data.List.NonEmpty (fromList)
import Data.Maybe
@ -42,6 +44,7 @@ import Data.Ord
import qualified Data.Set as S
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.IO as T
import Data.Time.Calendar (Day)
#if MIN_VERSION_time(1,5,0)
@ -53,17 +56,28 @@ import System.Locale (defaultTimeLocale)
import Safe
import System.Directory (doesFileExist)
import System.FilePath
import Text.CSV (parseCSV, CSV)
import qualified Data.Csv as Cassava
import qualified Data.Csv.Parser.Megaparsec as CassavaMP
import qualified Data.ByteString as B
import Data.ByteString.Lazy (fromStrict)
import Data.Foldable
import Text.Megaparsec hiding (parse)
import Text.Megaparsec.Char
import qualified Text.Parsec as Parsec
import Text.Printf (printf)
import Data.Word
import Hledger.Data
import Hledger.Utils.UTF8IOCompat (getContents)
import Hledger.Utils
import Hledger.Read.Common (Reader(..),InputOpts(..),amountp, statusp, genericSourcePos)
type CSV = [Record]
type Record = [Field]
type Field = String
data CSVError = CSVError (ParseError Word8 CassavaMP.ConversionError)
deriving Show
reader :: Reader
reader = Reader
@ -78,7 +92,8 @@ reader = Reader
parse :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal
parse iopts f t = do
let rulesfile = mrules_file_ iopts
r <- liftIO $ readJournalFromCsv rulesfile f t
let separator = separator_ iopts
r <- liftIO $ readJournalFromCsv separator rulesfile f t
case r of Left e -> throwError e
Right j -> return $ journalNumberAndTieTransactions j
-- XXX does not use parseAndFinaliseJournal like the other readers
@ -94,9 +109,9 @@ parse iopts f t = do
-- 4. if the rules file didn't exist, create it with the default rules and filename
-- 5. return the transactions as a Journal
-- @
readJournalFromCsv :: Maybe FilePath -> FilePath -> Text -> IO (Either String Journal)
readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when reading CSV from stdin"
readJournalFromCsv mrulesfile csvfile csvdata =
readJournalFromCsv :: Char -> Maybe FilePath -> FilePath -> Text -> IO (Either String Journal)
readJournalFromCsv _ Nothing "-" _ = return $ Left "please use --rules-file when reading CSV from stdin"
readJournalFromCsv separator mrulesfile csvfile csvdata =
handle (\e -> return $ Left $ show (e :: IOException)) $ do
let throwerr = throw.userError
@ -124,7 +139,7 @@ readJournalFromCsv mrulesfile csvfile csvdata =
records <- (either throwerr id .
dbg2 "validateCsv" . validateCsv skip .
dbg2 "parseCsv")
`fmap` parseCsv parsecfilename (T.unpack csvdata)
`fmap` parseCsv separator parsecfilename csvdata
dbg1IO "first 3 csv records" $ take 3 records
-- identify header lines
@ -166,14 +181,41 @@ readJournalFromCsv mrulesfile csvfile csvdata =
return $ Right nulljournal{jtxns=txns''}
parseCsv :: FilePath -> String -> IO (Either Parsec.ParseError CSV)
parseCsv path csvdata =
case path of
"-" -> liftM (parseCSV "(stdin)") getContents
_ -> return $ parseCSV path csvdata
parseCsv :: Char -> FilePath -> Text -> IO (Either CSVError CSV)
parseCsv separator filePath csvdata =
case filePath of
"-" -> liftM (parseCassava separator "(stdin)") T.getContents
_ -> return $ parseCassava separator filePath csvdata
parseCassava :: Char -> FilePath -> Text -> Either CSVError CSV
parseCassava separator path content =
case parseResult of
Left msg -> Left $ CSVError msg
Right a -> Right a
where parseResult = fmap parseResultToCsv $ CassavaMP.decodeWith (decodeOptions separator) Cassava.NoHeader path lazyContent
lazyContent = fromStrict $ T.encodeUtf8 content
decodeOptions :: Char -> Cassava.DecodeOptions
decodeOptions separator = Cassava.defaultDecodeOptions {
Cassava.decDelimiter = fromIntegral (ord separator)
}
parseResultToCsv :: (Foldable t, Functor t) => t (t B.ByteString) -> CSV
parseResultToCsv = toListList . unpackFields
where
toListList = toList . fmap toList
unpackFields = (fmap . fmap) (T.unpack . T.decodeUtf8)
printCSV :: CSV -> String
printCSV records = unlined (printRecord `map` records)
where printRecord = concat . intersperse "," . map printField
printField f = "\"" ++ concatMap escape f ++ "\""
escape '"' = "\"\""
escape x = [x]
unlined = concat . intersperse "\n"
-- | Return the cleaned up and validated CSV data (can be empty), or an error.
validateCsv :: Int -> Either Parsec.ParseError CSV -> Either String [CsvRecord]
validateCsv :: Int -> Either CSVError CSV -> Either String [CsvRecord]
validateCsv _ (Left e) = Left $ show e
validateCsv numhdrlines (Right rs) = validate $ drop numhdrlines $ filternulls rs
where

View File

@ -2,7 +2,7 @@
--
-- see: https://github.com/sol/hpack
--
-- hash: 642c6b4607959713188c82341f1050872ec6111a64f8e4b4cc1c1630da585baf
-- hash: 7d48cc897fb582a2600c3f3405a5463b853316f4a9fae370f0a74c46576a6198
name: hledger-lib
version: 1.10.99
@ -111,9 +111,10 @@ library
, blaze-markup >=0.5.1
, bytestring
, call-stack
, cassava
, cassava-megaparsec
, cmdargs >=0.10
, containers
, csv
, data-default >=0.5
, deepseq
, directory
@ -209,9 +210,10 @@ test-suite doctests
, blaze-markup >=0.5.1
, bytestring
, call-stack
, cassava
, cassava-megaparsec
, cmdargs >=0.10
, containers
, csv
, data-default >=0.5
, deepseq
, directory
@ -308,9 +310,10 @@ test-suite easytests
, blaze-markup >=0.5.1
, bytestring
, call-stack
, cassava
, cassava-megaparsec
, cmdargs >=0.10
, containers
, csv
, data-default >=0.5
, deepseq
, directory

View File

@ -48,7 +48,8 @@ dependencies:
- call-stack
- cmdargs >=0.10
- containers
- csv
- cassava
- cassava-megaparsec
- data-default >=0.5
- Decimal
- deepseq

View File

@ -120,6 +120,7 @@ inputflags :: [Flag RawOpts]
inputflags = [
flagReq ["file","f"] (\s opts -> Right $ setopt "file" s opts) "FILE" "use a different input file. For stdin, use - (default: $LEDGER_FILE or $HOME/.hledger.journal)"
,flagReq ["rules-file"] (\s opts -> Right $ setopt "rules-file" s opts) "RFILE" "CSV conversion rules file (default: FILE.rules)"
,flagReq ["separator"] (\s opts -> Right $ setopt "separator" s opts) "SEPARATOR" "CSV separator (default: ,)"
,flagReq ["alias"] (\s opts -> Right $ setopt "alias" s opts) "OLD=NEW" "rename accounts named OLD to NEW"
,flagNone ["anon"] (setboolopt "anon") "anonymize accounts and payees"
,flagReq ["pivot"] (\s opts -> Right $ setopt "pivot" s opts) "TAGNAME" "use some other field/tag for account names"

View File

@ -258,7 +258,7 @@ import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import System.Console.CmdArgs.Explicit as C
import Lucid as L
import Text.CSV
import Test.HUnit()
import Text.Printf (printf)
import Text.Tabular as T
--import Text.Tabular.AsciiWide
@ -266,6 +266,7 @@ import Text.Tabular as T
import Hledger
import Hledger.Cli.CliOptions
import Hledger.Cli.Utils
import Hledger.Read.CsvReader (CSV, printCSV)
-- | Command line options for this command.

View File

@ -17,7 +17,8 @@ where
import Data.Text (Text)
import qualified Data.Text as T
import System.Console.CmdArgs.Explicit
import Text.CSV
import Test.HUnit()
import Hledger.Read.CsvReader (CSV, printCSV)
import Hledger
import Hledger.Cli.CliOptions

View File

@ -20,13 +20,13 @@ import Data.Maybe
-- import Data.Text (Text)
import qualified Data.Text as T
import System.Console.CmdArgs.Explicit
import Text.CSV
import Hledger.Read.CsvReader (CSV, Record, printCSV)
import Test.HUnit()
import Hledger
import Hledger.Cli.CliOptions
import Hledger.Cli.Utils
registermode = (defCommandMode $ ["register"] ++ aliases) {
modeHelp = "show postings and running total. With --date2, show and sort by secondary date instead." `withAliases` aliases
,modeGroupFlags = Group {

View File

@ -18,7 +18,7 @@ import Data.Maybe (fromMaybe)
import qualified Data.Text as TS
import qualified Data.Text.Lazy as TL
import System.Console.CmdArgs.Explicit as C
import Text.CSV
import Hledger.Read.CsvReader (CSV, printCSV)
import Lucid as L
import Text.Tabular as T

View File

@ -2,7 +2,7 @@
--
-- see: https://github.com/sol/hpack
--
-- hash: 3be7e8745a826dbfc9d0007b9b37c3962486573614267365e6dafb8f7079ece6
-- hash: 670748bbdefdd5950fbc676e79a7c3924edbe21ac333141915b5509e799fa071
name: hledger
version: 1.10.99
@ -120,7 +120,6 @@ library
, bytestring
, cmdargs >=0.10
, containers
, csv
, data-default >=0.5
, directory
, easytest
@ -173,7 +172,6 @@ executable hledger
, bytestring
, cmdargs >=0.10
, containers
, csv
, data-default >=0.5
, directory
, easytest
@ -228,7 +226,6 @@ test-suite test
, bytestring
, cmdargs >=0.10
, containers
, csv
, data-default >=0.5
, directory
, easytest
@ -283,7 +280,6 @@ benchmark bench
, cmdargs >=0.10
, containers
, criterion
, csv
, data-default >=0.5
, directory
, easytest

View File

@ -85,7 +85,6 @@ dependencies:
- bytestring
- cmdargs >=0.10
- containers
- csv
- data-default >=0.5
- Decimal
- directory

View File

@ -9,7 +9,8 @@ packages:
- hledger-web
- hledger-api
#extra-deps:
extra-deps:
- cassava-megaparsec-1.0.0
nix:
pure: false

View File

@ -17,20 +17,21 @@
# 2. reading CSV with in-field and out-field
printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescription %%2\namount-in %%3\namount-out %%4\ncurrency $\n' >t.$$.csv.rules ; hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
<<<
10/2009/09,Flubber Co,50,
11/2009/09,Flubber Co,,50
10/2009/09,Flubber Co🎅,50,
11/2009/09,Flubber Co🎅,,50
>>>
2009/09/10 Flubber Co
2009/09/10 Flubber Co🎅
Assets:MyAccount $50
income:unknown $-50
2009/09/11 Flubber Co
2009/09/11 Flubber Co🎅
Assets:MyAccount $-50
expenses:unknown $50
>>>2
>>>=0
# 3. handle conditions assigning multiple fields
printf 'fields date, description, amount\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\nif Flubber\n account2 acct\n comment cmt' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
>>>
@ -92,3 +93,20 @@
>>>2
>>>=0
# 8. reading CSV with custom separator
printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescription %%2\namount-in %%3\namount-out %%4\ncurrency $\n' >t.$$.csv.rules ; hledger --separator ';' -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
<<<
10/2009/09;Flubber Co🎅;50;
11/2009/09;Flubber Co🎅;;50
>>>
2009/09/10 Flubber Co🎅
Assets:MyAccount $50
income:unknown $-50
2009/09/11 Flubber Co🎅
Assets:MyAccount $-50
expenses:unknown $50
>>>2
>>>=0