{-| A reader for CSV files. Uses optional extra rules to help interpret the data, like the convert command. -} module Hledger.Read.CsvReader ( CsvRules(..), nullrules, reader, tests_Hledger_Read_CsvReader ) where import Control.Monad import Control.Monad.Error import Test.HUnit -- import Text.ParserCombinators.Parsec hiding (parse) import Data.List import Data.Maybe import Data.Ord import Data.Time.Format (parseTime) import Safe import System.Directory (doesFileExist) import System.Exit (exitFailure) import System.FilePath (takeBaseName, replaceExtension) import System.IO (stderr) import System.Locale (defaultTimeLocale) import Test.HUnit import Text.CSV (parseCSV, parseCSVFromFile, CSV) import Text.ParserCombinators.Parsec import Text.Printf (hPrintf) import Hledger.Data import Hledger.Read.Utils import Prelude hiding (getContents) import Hledger.Utils.UTF8 (getContents) import Hledger.Utils import Hledger.Data.FormatStrings as FormatStrings import Hledger.Read.JournalReader (ledgeraccountname, someamount) -- import Hledger.Read.JournalReader (ledgerDirective, ledgerHistoricalPrice, -- ledgerDefaultYear, emptyLine, ledgerdatetime) reader :: Reader reader = Reader format detect parse_ format :: String format = "csv" -- | Does the given file path and data look like CSV ? detect :: FilePath -> String -> Bool detect f _ = fileSuffix f == format -- | Parse and post-process a "Journal" from CSV data, or give an error. -- XXX currently ignores the string and reads from the file path parse_ :: Maybe ParseRules -> FilePath -> String -> ErrorT String IO Journal parse_ rules f s = do r <- liftIO $ journalFromCsv rules f s case r of Left e -> throwError e Right j -> return j -- csvFile :: GenParser Char JournalContext (JournalUpdate,JournalContext) -- csvFile = do items <- many timelogItem -- eof -- ctx <- getState -- return (liftM (foldr (.) id) $ sequence items, ctx) nullrules = CsvRules { dateField=Nothing, dateFormat=Nothing, statusField=Nothing, codeField=Nothing, descriptionField=[], amountField=Nothing, amountInField=Nothing, amountOutField=Nothing, currencyField=Nothing, baseCurrency=Nothing, accountField=Nothing, account2Field=Nothing, effectiveDateField=Nothing, baseAccount="unknown", accountRules=[] } type CsvRecord = [String] -- | Read the CSV file named as an argument and print equivalent journal transactions, -- using/creating a .rules file. journalFromCsv :: Maybe CsvRules -> FilePath -> String -> IO (Either String Journal) journalFromCsv csvrules csvfile content = do let usingStdin = csvfile == "-" -- rulesFileSpecified = isJust $ rules_file_ opts -- when (usingStdin && (not rulesFileSpecified)) $ error' "please use --rules-file to specify a rules file when converting stdin" csvparse <- parseCsv csvfile content let records = case csvparse of Left e -> error' $ show e Right rs -> filter (/= [""]) rs rules <- case csvrules of Nothing -> do let rulesfile = rulesFileFor csvfile exists <- doesFileExist rulesfile if (not exists) then do hPrintf stderr "creating conversion rules file %s, edit this file for better results\n" rulesfile writeFile rulesfile initialRulesFileContent else hPrintf stderr "using conversion rules file %s\n" rulesfile liftM (either (error'.show) id) $ parseCsvRulesFile rulesfile Just r -> return r let invalid = validateRules rules -- when (debug_ opts) $ hPrintf stderr "rules: %s\n" (show rules) when (isJust invalid) $ error (fromJust invalid) let requiredfields = max 2 (maxFieldIndex rules + 1) badrecords = take 1 $ filter ((< requiredfields).length) records if null badrecords then do return $ Right nulljournal{jtxns=sortBy (comparing tdate) $ map (transactionFromCsvRecord rules) records} else return $ Left (unlines [ "Warning, at least one CSV record does not contain a field referenced by the" ,"conversion rules file, or has less than two fields. Are you converting a" ,"valid CSV file ? First bad record:" , show $ head badrecords ]) parseCsv :: FilePath -> String -> IO (Either ParseError CSV) parseCsv path content = case path of "-" -> liftM (parseCSV "(stdin)") getContents _ -> return $ parseCSV path content -- | The highest (0-based) field index referenced in the field -- definitions, or -1 if no fields are defined. maxFieldIndex :: CsvRules -> Int maxFieldIndex r = maximumDef (-1) $ catMaybes [ dateField r ,statusField r ,codeField r ,amountField r ,amountInField r ,amountOutField r ,currencyField r ,accountField r ,account2Field r ,effectiveDateField r ] -- rulesFileFor :: CliOpts -> FilePath -> FilePath -- rulesFileFor CliOpts{rules_file_=Just f} _ = f -- rulesFileFor CliOpts{rules_file_=Nothing} csvfile = replaceExtension csvfile ".rules" rulesFileFor :: FilePath -> FilePath rulesFileFor = flip replaceExtension ".rules" initialRulesFileContent :: String initialRulesFileContent = let prognameandversion = "hledger" in "# csv conversion rules file generated by " ++ prognameandversion ++ "\n" ++ "# Add rules to this file for more accurate conversion, see\n"++ "# http://hledger.org/MANUAL.html#convert\n" ++ "\n" ++ "base-account assets:bank:checking\n" ++ "date-field 0\n" ++ "description-field 4\n" ++ "amount-field 1\n" ++ "base-currency $\n" ++ "\n" ++ "# account-assigning rules\n" ++ "\n" ++ "SPECTRUM\n" ++ "expenses:health:gym\n" ++ "\n" ++ "ITUNES\n" ++ "BLKBSTR=BLOCKBUSTER\n" ++ "expenses:entertainment\n" ++ "\n" ++ "(TO|FROM) SAVINGS\n" ++ "assets:bank:savings\n" validateRules :: CsvRules -> Maybe String validateRules rules = let hasAmount = isJust $ amountField rules hasIn = isJust $ amountInField rules hasOut = isJust $ amountOutField rules in case (hasAmount, hasIn, hasOut) of (True, True, _) -> Just "Don't specify amount-in-field when specifying amount-field" (True, _, True) -> Just "Don't specify amount-out-field when specifying amount-field" (_, False, True) -> Just "Please specify amount-in-field when specifying amount-out-field" (_, True, False) -> Just "Please specify amount-out-field when specifying amount-in-field" (False, False, False) -> Just "Please specify either amount-field, or amount-in-field and amount-out-field" _ -> Nothing -- rules file parser parseCsvRulesFile :: FilePath -> IO (Either ParseError CsvRules) parseCsvRulesFile f = do s <- readFile f return $ parseCsvRules f s parseCsvRules :: FilePath -> String -> Either ParseError CsvRules parseCsvRules rulesfile s = runParser csvrulesfile nullrules{baseAccount=takeBaseName rulesfile} rulesfile s csvrulesfile :: GenParser Char CsvRules CsvRules csvrulesfile = do many blankorcommentline many definitions r <- getState ars <- many accountrule many blankorcommentline eof return r{accountRules=ars} definitions :: GenParser Char CsvRules () definitions = do choice' [ datefield ,dateformat ,statusfield ,codefield ,descriptionfield ,amountfield ,amountinfield ,amountoutfield ,currencyfield ,accountfield ,account2field ,effectivedatefield ,basecurrency ,baseaccount ,commentline ] "definition" return () datefield = do string "date-field" many1 spacenonewline v <- restofline updateState (\r -> r{dateField=readMay v}) effectivedatefield = do string "effective-date-field" many1 spacenonewline v <- restofline updateState (\r -> r{effectiveDateField=readMay v}) dateformat = do string "date-format" many1 spacenonewline v <- restofline updateState (\r -> r{dateFormat=Just v}) codefield = do string "code-field" many1 spacenonewline v <- restofline updateState (\r -> r{codeField=readMay v}) statusfield = do string "status-field" many1 spacenonewline v <- restofline updateState (\r -> r{statusField=readMay v}) descriptionFieldValue :: GenParser Char st [FormatString] descriptionFieldValue = do -- try (fieldNo <* spacenonewline) try fieldNo <|> formatStrings where fieldNo = many1 digit >>= \x -> return [FormatField False Nothing Nothing $ FieldNo $ read x] descriptionfield = do string "description-field" many1 spacenonewline formatS <- descriptionFieldValue restofline updateState (\x -> x{descriptionField=formatS}) amountfield = do string "amount-field" many1 spacenonewline v <- restofline x <- updateState (\r -> r{amountField=readMay v}) return x amountinfield = do choice [string "amount-in-field", string "in-field"] many1 spacenonewline v <- restofline updateState (\r -> r{amountInField=readMay v}) amountoutfield = do choice [string "amount-out-field", string "out-field"] many1 spacenonewline v <- restofline updateState (\r -> r{amountOutField=readMay v}) currencyfield = do string "currency-field" many1 spacenonewline v <- restofline updateState (\r -> r{currencyField=readMay v}) accountfield = do string "account-field" many1 spacenonewline v <- restofline updateState (\r -> r{accountField=readMay v}) account2field = do string "account2-field" many1 spacenonewline v <- restofline updateState (\r -> r{account2Field=readMay v}) basecurrency = do choice [string "base-currency", string "currency"] many1 spacenonewline v <- restofline updateState (\r -> r{baseCurrency=Just v}) baseaccount = do string "base-account" many1 spacenonewline v <- ledgeraccountname optional newline updateState (\r -> r{baseAccount=v}) accountrule :: GenParser Char CsvRules AccountRule accountrule = do many blankorcommentline pats <- many1 matchreplacepattern guard $ length pats >= 2 let pats' = init pats acct = either (fail.show) id $ runParser ledgeraccountname () "" $ fst $ last pats many blankorcommentline return (pats',acct) "account rule" blanklines = many1 blankline blankline = many spacenonewline >> newline >> return () "blank line" commentchar = oneOf ";#" commentline = many spacenonewline >> commentchar >> restofline >> return () "comment line" blankorcommentline = choice' [blankline, commentline] matchreplacepattern = do notFollowedBy commentchar matchpat <- many1 (noneOf "=\n") replpat <- optionMaybe $ do {char '='; many $ noneOf "\n"} newline return (matchpat,replpat) -- csv record conversion formatD :: CsvRecord -> Bool -> Maybe Int -> Maybe Int -> HledgerFormatField -> String formatD record leftJustified min max f = case f of FieldNo n -> maybe "" show $ atMay record n -- Some of these might in theory in read from fields AccountField -> "" DepthSpacerField -> "" TotalField -> "" DefaultDateField -> "" DescriptionField -> "" where show = formatValue leftJustified min max formatDescription :: CsvRecord -> [FormatString] -> String formatDescription _ [] = "" formatDescription record (f:fs) = s ++ (formatDescription record fs) where s = case f of FormatLiteral l -> l FormatField leftJustified min max field -> formatD record leftJustified min max field transactionFromCsvRecord :: CsvRules -> CsvRecord -> Transaction transactionFromCsvRecord rules fields = let date = parsedate $ normaliseDate (dateFormat rules) $ maybe "1900/1/1" (atDef "" fields) (dateField rules) effectivedate = do idx <- effectiveDateField rules return $ parsedate $ normaliseDate (dateFormat rules) $ (atDef "" fields) idx status = maybe False (null . strip . (atDef "" fields)) (statusField rules) code = maybe "" (atDef "" fields) (codeField rules) desc = formatDescription fields (descriptionField rules) comment = "" precomment = "" baseacc = maybe (baseAccount rules) (atDef "" fields) (accountField rules) amountstr = getAmount rules fields amountstr' = strnegate amountstr where strnegate ('-':s) = s strnegate s = '-':s currency = maybe (fromMaybe "" $ baseCurrency rules) (atDef "" fields) (currencyField rules) amountstr'' = currency ++ amountstr' amountparse = runParser someamount nullctx "" amountstr'' amount = either (const nullmixedamt) id amountparse -- Using costOfMixedAmount here to allow complex costs like "10 GBP @@ 15 USD". -- Aim is to have "10 GBP @@ 15 USD" applied to account "acct", but have "-15USD" applied to "baseacct" baseamount = costOfMixedAmount amount unknownacct | (readDef 0 amountstr' :: Double) < 0 = "income:unknown" | otherwise = "expenses:unknown" (acct',newdesc) = identify (accountRules rules) unknownacct desc acct = maybe acct' (atDef "" fields) (account2Field rules) t = Transaction { tdate=date, teffectivedate=effectivedate, tstatus=status, tcode=code, tdescription=newdesc, tcomment=comment, tpreceding_comment_lines=precomment, tmetadata=[], tpostings=[ Posting { pstatus=False, paccount=acct, pamount=amount, pcomment="", ptype=RegularPosting, pmetadata=[], ptransaction=Just t }, Posting { pstatus=False, paccount=baseacc, pamount=(-baseamount), pcomment="", ptype=RegularPosting, pmetadata=[], ptransaction=Just t } ] } in t -- | Convert some date string with unknown format to YYYY/MM/DD. normaliseDate :: Maybe String -- ^ User-supplied date format: this should be tried in preference to all others -> String -> String normaliseDate mb_user_format s = let parsewith = flip (parseTime defaultTimeLocale) s in maybe (error' $ "could not parse \""++s++"\" as a date, consider adding a date-format directive or upgrading") showDate $ firstJust $ (map parsewith $ maybe [] (:[]) mb_user_format -- the - modifier requires time-1.2.0.5, released -- in 2011/5, so for now we emulate it for wider -- compatibility. time < 1.2.0.5 also has a buggy -- %y which we don't do anything about. -- ++ [ -- "%Y/%m/%d" -- ,"%Y/%-m/%-d" -- ,"%Y-%m-%d" -- ,"%Y-%-m-%-d" -- ,"%m/%d/%Y" -- ,"%-m/%-d/%Y" -- ,"%m-%d-%Y" -- ,"%-m-%-d-%Y" -- ] ) ++ [ parseTime defaultTimeLocale "%Y/%m/%e" s ,parseTime defaultTimeLocale "%Y-%m-%e" s ,parseTime defaultTimeLocale "%m/%e/%Y" s ,parseTime defaultTimeLocale "%m-%e-%Y" s ,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s) ,parseTime defaultTimeLocale "%Y-%m-%e" (take 5 s ++ "0" ++ drop 5 s) ,parseTime defaultTimeLocale "%m/%e/%Y" ('0':s) ,parseTime defaultTimeLocale "%m-%e-%Y" ('0':s) ] -- | Apply account matching rules to a transaction description to obtain -- the most appropriate account and a new description. identify :: [AccountRule] -> String -> String -> (String,String) identify rules defacct desc | null matchingrules = (defacct,desc) | otherwise = (acct,newdesc) where matchingrules = filter ismatch rules :: [AccountRule] where ismatch = any ((`regexMatchesCI` desc) . fst) . fst (prs,acct) = head matchingrules p_ms_r = filter (\(_,m,_) -> m) $ map (\(p,r) -> (p, p `regexMatchesCI` desc, r)) prs (p,_,r) = head p_ms_r newdesc = case r of Just repl -> regexReplaceCI p repl desc Nothing -> desc caseinsensitive = ("(?i)"++) getAmount :: CsvRules -> CsvRecord -> String getAmount rules fields = case amountField rules of Just f -> maybe "" (atDef "" fields) $ Just f Nothing -> case (i, o) of (x, "") -> x ("", x) -> "-"++x p -> error' $ "using amount-in-field and amount-out-field, found a value in both fields: "++show p where i = maybe "" (atDef "" fields) (amountInField rules) o = maybe "" (atDef "" fields) (amountOutField rules) tests_Hledger_Read_CsvReader = TestList (test_parser ++ test_description_parsing) test_description_parsing = [ "description-field 1" ~: assertParseDescription "description-field 1\n" [FormatField False Nothing Nothing (FieldNo 1)] , "description-field 1 " ~: assertParseDescription "description-field 1 \n" [FormatField False Nothing Nothing (FieldNo 1)] , "description-field %(1)" ~: assertParseDescription "description-field %(1)\n" [FormatField False Nothing Nothing (FieldNo 1)] , "description-field %(1)/$(2)" ~: assertParseDescription "description-field %(1)/%(2)\n" [ FormatField False Nothing Nothing (FieldNo 1) , FormatLiteral "/" , FormatField False Nothing Nothing (FieldNo 2) ] ] where assertParseDescription string expected = do assertParseEqual (parseDescription string) (nullrules {descriptionField = expected}) parseDescription :: String -> Either ParseError CsvRules parseDescription x = runParser descriptionfieldWrapper nullrules "(unknown)" x descriptionfieldWrapper :: GenParser Char CsvRules CsvRules descriptionfieldWrapper = do descriptionfield r <- getState return r test_parser = [ "convert rules parsing: empty file" ~: do -- let assertMixedAmountParse parseresult mixedamount = -- (either (const "parse error") showMixedAmountDebug parseresult) ~?= (showMixedAmountDebug mixedamount) assertParseEqual (parseCsvRules "unknown" "") nullrules ,"convert rules parsing: accountrule" ~: do assertParseEqual (parseWithCtx nullrules accountrule "A\na\n") -- leading blank line required ([("A",Nothing)], "a") ,"convert rules parsing: trailing comments" ~: do assertParse (parseWithCtx nullrules csvrulesfile "A\na\n# \n#\n") ,"convert rules parsing: trailing blank lines" ~: do assertParse (parseWithCtx nullrules csvrulesfile "A\na\n\n \n") -- not supported -- ,"convert rules parsing: no final newline" ~: do -- assertParse (parseWithCtx nullrules csvrulesfile "A\na") -- assertParse (parseWithCtx nullrules csvrulesfile "A\na\n# \n#") -- assertParse (parseWithCtx nullrules csvrulesfile "A\na\n\n ") -- (nullrules{ -- -- dateField=Maybe FieldPosition, -- -- statusField=Maybe FieldPosition, -- -- codeField=Maybe FieldPosition, -- -- descriptionField=Maybe FieldPosition, -- -- amountField=Maybe FieldPosition, -- -- currencyField=Maybe FieldPosition, -- -- baseCurrency=Maybe String, -- -- baseAccount=AccountName, -- accountRules=[ -- ([("A",Nothing)], "a") -- ] -- }) ]