diff --git a/hledger-lib/Hledger/Data/Types.hs b/hledger-lib/Hledger/Data/Types.hs index 170985113..c8ed06ebb 100644 --- a/hledger-lib/Hledger/Data/Types.hs +++ b/hledger-lib/Hledger/Data/Types.hs @@ -191,6 +191,8 @@ data Reader = Reader { ,rParser :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal } +instance Show Reader where show r = "Reader for "++rFormat r + -- data format parse/conversion rules -- currently the only parse (conversion) rules are those for the CSV format diff --git a/hledger-lib/Hledger/Read.hs b/hledger-lib/Hledger/Read.hs index 063702b0d..6e3559fe6 100644 --- a/hledger-lib/Hledger/Read.hs +++ b/hledger-lib/Hledger/Read.hs @@ -84,14 +84,6 @@ defaultJournalPath = do defaultJournal :: IO Journal defaultJournal = defaultJournalPath >>= readJournalFile Nothing Nothing >>= either error' return --- | Find the reader which can handle the given format, if any. --- Typically there is just one; only the first is returned. -readerForFormat :: Format -> Maybe Reader -readerForFormat s | null rs = Nothing - | otherwise = Just $ head rs - where - rs = filter ((s==).rFormat) readers :: [Reader] - -- | Read a journal from the given string, trying all known formats, or simply throw an error. readJournal' :: String -> IO Journal readJournal' s = readJournal Nothing Nothing Nothing s >>= either error' return @@ -103,52 +95,56 @@ tests_readJournal' = [ ] --- | Read a Journal from this string or give an error message, using the --- specified data format or trying all known formats. A CSV conversion --- rules file may be specified for better conversion of that format, --- and/or a file path for better error messages. + +-- | Read a journal from this string, trying whatever readers seem appropriate: +-- +-- - if a format is specified, try that reader only +-- +-- - or if one or more readers recognises the file path and data, try those +-- +-- - otherwise, try them all. +-- +-- A CSV conversion rules file may also be specified for use by the CSV reader. readJournal :: Maybe Format -> Maybe FilePath -> Maybe FilePath -> String -> IO (Either String Journal) readJournal format rulesfile path s = - let readerstotry = case format of Nothing -> readers - Just f -> case readerForFormat f of Just r -> [r] - Nothing -> [] - in firstSuccessOrBestError $ map tryReader readerstotry + -- trace (show (format, rulesfile, path)) $ + tryReaders $ readersFor (format, path, s) where - path' = fromMaybe "(string)" path - tryReader :: Reader -> IO (Either String Journal) - tryReader r = do -- printf "trying %s reader\n" (rFormat r) - (runErrorT . (rParser r) rulesfile path') s - - -- if no reader succeeds, we return the error of the first; - -- ideally it would be the error of the most likely intended - -- reader, based on file suffix and/or data sniffing. - firstSuccessOrBestError :: [IO (Either String Journal)] -> IO (Either String Journal) - firstSuccessOrBestError [] = return $ Left "no readers found" - firstSuccessOrBestError attempts = firstSuccessOrBestError' attempts + -- try each reader in turn, returning the error of the first if all fail + tryReaders :: [Reader] -> IO (Either String Journal) + tryReaders = firstSuccessOrBestError [] where - firstSuccessOrBestError' [] = head attempts - firstSuccessOrBestError' (a:as) = do - r <- a - case r of Right j -> return $ Right j - Left _ -> firstSuccessOrBestError' as + firstSuccessOrBestError :: [String] -> [Reader] -> IO (Either String Journal) + firstSuccessOrBestError [] [] = return $ Left "no readers found" + firstSuccessOrBestError errs (r:rs) = do + -- printf "trying %s reader\n" (rFormat r) + result <- (runErrorT . (rParser r) rulesfile path') s + case result of Right j -> return $ Right j -- success! + Left e -> firstSuccessOrBestError (errs++[e]) rs -- keep trying + firstSuccessOrBestError (e:_) [] = return $ Left e -- none left, return first error + path' = fromMaybe "(string)" path - -- -- unknown format - -- bestErrorMsg :: [String] -> String -> Maybe FilePath -> String - -- bestErrorMsg [] _ path = printf "could not parse %sdata%s" fmts pathmsg - -- where fmts = case formats of - -- [] -> "" - -- [f] -> f ++ " " - -- fs -> intercalate ", " (init fs) ++ " or " ++ last fs ++ " " - -- pathmsg = case path of - -- Nothing -> "" - -- Just p -> " in "++p - -- -- one or more errors - report (the most appropriate ?) one - -- bestErrorMsg es s path = printf "could not parse %s data%s\n%s" (rFormat r) pathmsg e - -- where (r,e) = headDef (head readers, head es) $ filter detects $ zip readers es - -- detects (r,_) = (rDetector r) path' s - -- pathmsg = case path of - -- Nothing -> "" - -- Just p -> " in "++p +-- | Which readers are worth trying for this (possibly unspecified) format, filepath, and data ? +readersFor :: (Maybe Format, Maybe FilePath, String) -> [Reader] +readersFor (format,path,s) = + case format of + Just f -> case readerForFormat f of Just r -> [r] + Nothing -> [] + Nothing -> case path of Nothing -> readers + Just "-" -> readers + Just p -> case readersForPathAndData (p,s) of [] -> readers + rs -> rs + +-- | Find the (first) reader which can handle the given format, if any. +readerForFormat :: Format -> Maybe Reader +readerForFormat s | null rs = Nothing + | otherwise = Just $ head rs + where + rs = filter ((s==).rFormat) readers :: [Reader] + +-- | Find the readers which think they can handle the given file path and data, if any. +readersForPathAndData :: (FilePath,String) -> [Reader] +readersForPathAndData (f,s) = filter (\r -> (rDetector r) f s) readers -- | Read a Journal from this file (or stdin if the filename is -) or give -- an error message, using the specified data format or trying all known diff --git a/hledger-lib/Hledger/Read/CsvReader.hs b/hledger-lib/Hledger/Read/CsvReader.hs index 7fb2b7a10..dbbb0e4c6 100644 --- a/hledger-lib/Hledger/Read/CsvReader.hs +++ b/hledger-lib/Hledger/Read/CsvReader.hs @@ -30,6 +30,7 @@ module Hledger.Read.CsvReader ( tests_Hledger_Read_CsvReader ) where +import Control.Exception hiding (try) import Control.Monad import Control.Monad.Error -- import Test.HUnit @@ -44,7 +45,7 @@ import System.IO (stderr) import System.Locale (defaultTimeLocale) import Test.HUnit import Text.CSV (parseCSV, CSV) -import Text.ParserCombinators.Parsec hiding (parse) +import Text.ParserCombinators.Parsec hiding (parse) import Text.ParserCombinators.Parsec.Error import Text.ParserCombinators.Parsec.Pos import Text.Printf (hPrintf) @@ -65,12 +66,13 @@ format = "csv" -- | Does the given file path and data look like CSV ? detect :: FilePath -> String -> Bool -detect f _ = takeExtension f == format +detect f _ = takeExtension f == '.':format -- | Parse and post-process a "Journal" from CSV data, or give an error. -- XXX currently ignores the string and reads from the file path parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal -parse rulesfile f s = do +parse rulesfile f s = -- trace ("running "++format++" reader") $ + do r <- liftIO $ readJournalFromCsv rulesfile f s case r of Left e -> throwError e Right j -> return j @@ -96,44 +98,43 @@ nullrules = CsvRules { type CsvRecord = [String] --- | Read a Journal or an error message from the given CSV data (and --- filename, used for error messages.) To do this we read a CSV --- conversion rules file, or auto-create a default one if it does not --- exist. The rules filename may be specified, otherwise it will be --- derived from the CSV filename (unless the filename is - in which case --- an error will be raised.) +-- | Read a Journal from the given CSV data (and filename, used for error +-- messages), or return an error. Proceed as follows: +-- @ +-- 1. parse the CSV data +-- 2. identify the name of a file specifying conversion rules: either use +-- the name provided, derive it from the CSV filename, or raise an error +-- if the CSV filename is -. +-- 3. auto-create the rules file with default rules if it doesn't exist +-- 4. parse the rules file +-- 5. convert the CSV records to a journal using the rules +-- @ readJournalFromCsv :: Maybe FilePath -> FilePath -> String -> IO (Either String Journal) -readJournalFromCsv rulesfile csvfile csvdata = do - let usingStdin = csvfile == "-" - rulesfile' = case rulesfile of - Just f -> f - Nothing -> if usingStdin - then error' "please use --rules-file to specify a rules file when converting stdin" - else rulesFileFor csvfile - created <- ensureRulesFileExists rulesfile' - if created - then hPrintf stderr "creating default conversion rules file %s, edit this file for better results\n" rulesfile' - else hPrintf stderr "using conversion rules file %s\n" rulesfile' - rules <- liftM (either (error'.show) id) $ parseCsvRulesFile rulesfile' - - +readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when converting stdin" +readJournalFromCsv mrulesfile csvfile csvdata = + handle (\e -> return $ Left $ show (e :: IOException)) $ do csvparse <- parseCsv csvfile csvdata - let records = case csvparse of - Left e -> error' $ show e + let rs = case csvparse of + Left e -> throw $ userError $ show e Right rs -> filter (/= [""]) rs + badrecords = take 1 $ filter ((< 2).length) rs + records = case badrecords of + [] -> rs + (_:_) -> throw $ userError $ "Parse error: at least one CSV record has less than two fields:\n"++(show $ head badrecords) - let requiredfields = max 2 (maxFieldIndex rules + 1) + let rulesfile = fromMaybe (rulesFileFor csvfile) mrulesfile + created <- records `seq` (trace "ensureRulesFile" $ ensureRulesFileExists rulesfile) + if created + then hPrintf stderr "creating default conversion rules file %s, edit this file for better results\n" rulesfile + else hPrintf stderr "using conversion rules file %s\n" rulesfile + + rules <- liftM (either (throw.userError.show) id) $ parseCsvRulesFile rulesfile + + let requiredfields = (maxFieldIndex rules + 1) badrecords = take 1 $ filter ((< requiredfields).length) records - if null badrecords - then do - return $ Right nulljournal{jtxns=sortBy (comparing tdate) $ map (transactionFromCsvRecord rules) records} - else - return $ Left (unlines [ - "Warning, at least one CSV record does not contain a field referenced by the" - ,"conversion rules file, or has less than two fields. Are you converting a" - ,"valid CSV file ? First bad record:" - , show $ head badrecords - ]) + return $ case badrecords of + [] -> Right nulljournal{jtxns=sortBy (comparing tdate) $ map (transactionFromCsvRecord rules) records} + (_:_) -> Left $ "Parse error: at least one CSV record does not contain a field referenced by the conversion rules file:\n"++(show $ head badrecords) -- | Ensure there is a conversion rules file at the given path, creating a -- default one if needed and returning True in this case. diff --git a/hledger-lib/Hledger/Read/JournalReader.hs b/hledger-lib/Hledger/Read/JournalReader.hs index ebee971a3..ca1508856 100644 --- a/hledger-lib/Hledger/Read/JournalReader.hs +++ b/hledger-lib/Hledger/Read/JournalReader.hs @@ -66,12 +66,13 @@ format = "journal" -- | Does the given file path and data provide hledger's journal file format ? detect :: FilePath -> String -> Bool -detect f _ = takeExtension f == format +detect f _ = takeExtension f `elem` ['.':format, ".j"] -- | Parse and post-process a "Journal" from hledger's journal file -- format, or give an error. parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal -parse _ = parseJournalWith journal +parse _ = -- trace ("running "++format++" reader") . + parseJournalWith journal -- parsing utils diff --git a/hledger-lib/Hledger/Read/TimelogReader.hs b/hledger-lib/Hledger/Read/TimelogReader.hs index 51f13dad3..18a51b15b 100644 --- a/hledger-lib/Hledger/Read/TimelogReader.hs +++ b/hledger-lib/Hledger/Read/TimelogReader.hs @@ -70,13 +70,14 @@ format = "timelog" -- | Does the given file path and data provide timeclock.el's timelog format ? detect :: FilePath -> String -> Bool -detect f _ = takeExtension f == format +detect f _ = takeExtension f == '.':format -- | Parse and post-process a "Journal" from timeclock.el's timelog -- format, saving the provided file path and the current time, or give an -- error. parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal -parse _ = parseJournalWith timelogFile +parse _ = -- trace ("running "++format++" reader") . + parseJournalWith timelogFile timelogFile :: GenParser Char JournalContext (JournalUpdate,JournalContext) timelogFile = do items <- many timelogItem