clean up reader selection, don't write a csv rules file on journal parse error

This commit is contained in:
Simon Michael 2012-05-28 18:40:36 +00:00
parent 0a5f1f5689
commit 1062e2f9a4
5 changed files with 90 additions and 89 deletions

View File

@ -191,6 +191,8 @@ data Reader = Reader {
,rParser :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal ,rParser :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
} }
instance Show Reader where show r = "Reader for "++rFormat r
-- data format parse/conversion rules -- data format parse/conversion rules
-- currently the only parse (conversion) rules are those for the CSV format -- currently the only parse (conversion) rules are those for the CSV format

View File

@ -84,14 +84,6 @@ defaultJournalPath = do
defaultJournal :: IO Journal defaultJournal :: IO Journal
defaultJournal = defaultJournalPath >>= readJournalFile Nothing Nothing >>= either error' return defaultJournal = defaultJournalPath >>= readJournalFile Nothing Nothing >>= either error' return
-- | Find the reader which can handle the given format, if any.
-- Typically there is just one; only the first is returned.
readerForFormat :: Format -> Maybe Reader
readerForFormat s | null rs = Nothing
| otherwise = Just $ head rs
where
rs = filter ((s==).rFormat) readers :: [Reader]
-- | Read a journal from the given string, trying all known formats, or simply throw an error. -- | Read a journal from the given string, trying all known formats, or simply throw an error.
readJournal' :: String -> IO Journal readJournal' :: String -> IO Journal
readJournal' s = readJournal Nothing Nothing Nothing s >>= either error' return readJournal' s = readJournal Nothing Nothing Nothing s >>= either error' return
@ -103,52 +95,56 @@ tests_readJournal' = [
] ]
-- | Read a Journal from this string or give an error message, using the
-- specified data format or trying all known formats. A CSV conversion -- | Read a journal from this string, trying whatever readers seem appropriate:
-- rules file may be specified for better conversion of that format, --
-- and/or a file path for better error messages. -- - if a format is specified, try that reader only
--
-- - or if one or more readers recognises the file path and data, try those
--
-- - otherwise, try them all.
--
-- A CSV conversion rules file may also be specified for use by the CSV reader.
readJournal :: Maybe Format -> Maybe FilePath -> Maybe FilePath -> String -> IO (Either String Journal) readJournal :: Maybe Format -> Maybe FilePath -> Maybe FilePath -> String -> IO (Either String Journal)
readJournal format rulesfile path s = readJournal format rulesfile path s =
let readerstotry = case format of Nothing -> readers -- trace (show (format, rulesfile, path)) $
Just f -> case readerForFormat f of Just r -> [r] tryReaders $ readersFor (format, path, s)
Nothing -> []
in firstSuccessOrBestError $ map tryReader readerstotry
where where
path' = fromMaybe "(string)" path -- try each reader in turn, returning the error of the first if all fail
tryReader :: Reader -> IO (Either String Journal) tryReaders :: [Reader] -> IO (Either String Journal)
tryReader r = do -- printf "trying %s reader\n" (rFormat r) tryReaders = firstSuccessOrBestError []
(runErrorT . (rParser r) rulesfile path') s
-- if no reader succeeds, we return the error of the first;
-- ideally it would be the error of the most likely intended
-- reader, based on file suffix and/or data sniffing.
firstSuccessOrBestError :: [IO (Either String Journal)] -> IO (Either String Journal)
firstSuccessOrBestError [] = return $ Left "no readers found"
firstSuccessOrBestError attempts = firstSuccessOrBestError' attempts
where where
firstSuccessOrBestError' [] = head attempts firstSuccessOrBestError :: [String] -> [Reader] -> IO (Either String Journal)
firstSuccessOrBestError' (a:as) = do firstSuccessOrBestError [] [] = return $ Left "no readers found"
r <- a firstSuccessOrBestError errs (r:rs) = do
case r of Right j -> return $ Right j -- printf "trying %s reader\n" (rFormat r)
Left _ -> firstSuccessOrBestError' as result <- (runErrorT . (rParser r) rulesfile path') s
case result of Right j -> return $ Right j -- success!
Left e -> firstSuccessOrBestError (errs++[e]) rs -- keep trying
firstSuccessOrBestError (e:_) [] = return $ Left e -- none left, return first error
path' = fromMaybe "(string)" path
-- -- unknown format -- | Which readers are worth trying for this (possibly unspecified) format, filepath, and data ?
-- bestErrorMsg :: [String] -> String -> Maybe FilePath -> String readersFor :: (Maybe Format, Maybe FilePath, String) -> [Reader]
-- bestErrorMsg [] _ path = printf "could not parse %sdata%s" fmts pathmsg readersFor (format,path,s) =
-- where fmts = case formats of case format of
-- [] -> "" Just f -> case readerForFormat f of Just r -> [r]
-- [f] -> f ++ " " Nothing -> []
-- fs -> intercalate ", " (init fs) ++ " or " ++ last fs ++ " " Nothing -> case path of Nothing -> readers
-- pathmsg = case path of Just "-" -> readers
-- Nothing -> "" Just p -> case readersForPathAndData (p,s) of [] -> readers
-- Just p -> " in "++p rs -> rs
-- -- one or more errors - report (the most appropriate ?) one
-- bestErrorMsg es s path = printf "could not parse %s data%s\n%s" (rFormat r) pathmsg e -- | Find the (first) reader which can handle the given format, if any.
-- where (r,e) = headDef (head readers, head es) $ filter detects $ zip readers es readerForFormat :: Format -> Maybe Reader
-- detects (r,_) = (rDetector r) path' s readerForFormat s | null rs = Nothing
-- pathmsg = case path of | otherwise = Just $ head rs
-- Nothing -> "" where
-- Just p -> " in "++p rs = filter ((s==).rFormat) readers :: [Reader]
-- | Find the readers which think they can handle the given file path and data, if any.
readersForPathAndData :: (FilePath,String) -> [Reader]
readersForPathAndData (f,s) = filter (\r -> (rDetector r) f s) readers
-- | Read a Journal from this file (or stdin if the filename is -) or give -- | Read a Journal from this file (or stdin if the filename is -) or give
-- an error message, using the specified data format or trying all known -- an error message, using the specified data format or trying all known

View File

@ -30,6 +30,7 @@ module Hledger.Read.CsvReader (
tests_Hledger_Read_CsvReader tests_Hledger_Read_CsvReader
) )
where where
import Control.Exception hiding (try)
import Control.Monad import Control.Monad
import Control.Monad.Error import Control.Monad.Error
-- import Test.HUnit -- import Test.HUnit
@ -44,7 +45,7 @@ import System.IO (stderr)
import System.Locale (defaultTimeLocale) import System.Locale (defaultTimeLocale)
import Test.HUnit import Test.HUnit
import Text.CSV (parseCSV, CSV) import Text.CSV (parseCSV, CSV)
import Text.ParserCombinators.Parsec hiding (parse) import Text.ParserCombinators.Parsec hiding (parse)
import Text.ParserCombinators.Parsec.Error import Text.ParserCombinators.Parsec.Error
import Text.ParserCombinators.Parsec.Pos import Text.ParserCombinators.Parsec.Pos
import Text.Printf (hPrintf) import Text.Printf (hPrintf)
@ -65,12 +66,13 @@ format = "csv"
-- | Does the given file path and data look like CSV ? -- | Does the given file path and data look like CSV ?
detect :: FilePath -> String -> Bool detect :: FilePath -> String -> Bool
detect f _ = takeExtension f == format detect f _ = takeExtension f == '.':format
-- | Parse and post-process a "Journal" from CSV data, or give an error. -- | Parse and post-process a "Journal" from CSV data, or give an error.
-- XXX currently ignores the string and reads from the file path -- XXX currently ignores the string and reads from the file path
parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
parse rulesfile f s = do parse rulesfile f s = -- trace ("running "++format++" reader") $
do
r <- liftIO $ readJournalFromCsv rulesfile f s r <- liftIO $ readJournalFromCsv rulesfile f s
case r of Left e -> throwError e case r of Left e -> throwError e
Right j -> return j Right j -> return j
@ -96,44 +98,43 @@ nullrules = CsvRules {
type CsvRecord = [String] type CsvRecord = [String]
-- | Read a Journal or an error message from the given CSV data (and -- | Read a Journal from the given CSV data (and filename, used for error
-- filename, used for error messages.) To do this we read a CSV -- messages), or return an error. Proceed as follows:
-- conversion rules file, or auto-create a default one if it does not -- @
-- exist. The rules filename may be specified, otherwise it will be -- 1. parse the CSV data
-- derived from the CSV filename (unless the filename is - in which case -- 2. identify the name of a file specifying conversion rules: either use
-- an error will be raised.) -- the name provided, derive it from the CSV filename, or raise an error
-- if the CSV filename is -.
-- 3. auto-create the rules file with default rules if it doesn't exist
-- 4. parse the rules file
-- 5. convert the CSV records to a journal using the rules
-- @
readJournalFromCsv :: Maybe FilePath -> FilePath -> String -> IO (Either String Journal) readJournalFromCsv :: Maybe FilePath -> FilePath -> String -> IO (Either String Journal)
readJournalFromCsv rulesfile csvfile csvdata = do readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when converting stdin"
let usingStdin = csvfile == "-" readJournalFromCsv mrulesfile csvfile csvdata =
rulesfile' = case rulesfile of handle (\e -> return $ Left $ show (e :: IOException)) $ do
Just f -> f
Nothing -> if usingStdin
then error' "please use --rules-file to specify a rules file when converting stdin"
else rulesFileFor csvfile
created <- ensureRulesFileExists rulesfile'
if created
then hPrintf stderr "creating default conversion rules file %s, edit this file for better results\n" rulesfile'
else hPrintf stderr "using conversion rules file %s\n" rulesfile'
rules <- liftM (either (error'.show) id) $ parseCsvRulesFile rulesfile'
csvparse <- parseCsv csvfile csvdata csvparse <- parseCsv csvfile csvdata
let records = case csvparse of let rs = case csvparse of
Left e -> error' $ show e Left e -> throw $ userError $ show e
Right rs -> filter (/= [""]) rs Right rs -> filter (/= [""]) rs
badrecords = take 1 $ filter ((< 2).length) rs
records = case badrecords of
[] -> rs
(_:_) -> throw $ userError $ "Parse error: at least one CSV record has less than two fields:\n"++(show $ head badrecords)
let requiredfields = max 2 (maxFieldIndex rules + 1) let rulesfile = fromMaybe (rulesFileFor csvfile) mrulesfile
created <- records `seq` (trace "ensureRulesFile" $ ensureRulesFileExists rulesfile)
if created
then hPrintf stderr "creating default conversion rules file %s, edit this file for better results\n" rulesfile
else hPrintf stderr "using conversion rules file %s\n" rulesfile
rules <- liftM (either (throw.userError.show) id) $ parseCsvRulesFile rulesfile
let requiredfields = (maxFieldIndex rules + 1)
badrecords = take 1 $ filter ((< requiredfields).length) records badrecords = take 1 $ filter ((< requiredfields).length) records
if null badrecords return $ case badrecords of
then do [] -> Right nulljournal{jtxns=sortBy (comparing tdate) $ map (transactionFromCsvRecord rules) records}
return $ Right nulljournal{jtxns=sortBy (comparing tdate) $ map (transactionFromCsvRecord rules) records} (_:_) -> Left $ "Parse error: at least one CSV record does not contain a field referenced by the conversion rules file:\n"++(show $ head badrecords)
else
return $ Left (unlines [
"Warning, at least one CSV record does not contain a field referenced by the"
,"conversion rules file, or has less than two fields. Are you converting a"
,"valid CSV file ? First bad record:"
, show $ head badrecords
])
-- | Ensure there is a conversion rules file at the given path, creating a -- | Ensure there is a conversion rules file at the given path, creating a
-- default one if needed and returning True in this case. -- default one if needed and returning True in this case.

View File

@ -66,12 +66,13 @@ format = "journal"
-- | Does the given file path and data provide hledger's journal file format ? -- | Does the given file path and data provide hledger's journal file format ?
detect :: FilePath -> String -> Bool detect :: FilePath -> String -> Bool
detect f _ = takeExtension f == format detect f _ = takeExtension f `elem` ['.':format, ".j"]
-- | Parse and post-process a "Journal" from hledger's journal file -- | Parse and post-process a "Journal" from hledger's journal file
-- format, or give an error. -- format, or give an error.
parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
parse _ = parseJournalWith journal parse _ = -- trace ("running "++format++" reader") .
parseJournalWith journal
-- parsing utils -- parsing utils

View File

@ -70,13 +70,14 @@ format = "timelog"
-- | Does the given file path and data provide timeclock.el's timelog format ? -- | Does the given file path and data provide timeclock.el's timelog format ?
detect :: FilePath -> String -> Bool detect :: FilePath -> String -> Bool
detect f _ = takeExtension f == format detect f _ = takeExtension f == '.':format
-- | Parse and post-process a "Journal" from timeclock.el's timelog -- | Parse and post-process a "Journal" from timeclock.el's timelog
-- format, saving the provided file path and the current time, or give an -- format, saving the provided file path and the current time, or give an
-- error. -- error.
parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
parse _ = parseJournalWith timelogFile parse _ = -- trace ("running "++format++" reader") .
parseJournalWith timelogFile
timelogFile :: GenParser Char JournalContext (JournalUpdate,JournalContext) timelogFile :: GenParser Char JournalContext (JournalUpdate,JournalContext)
timelogFile = do items <- many timelogItem timelogFile = do items <- many timelogItem