clean up reader selection, don't write a csv rules file on journal parse error

This commit is contained in:
Simon Michael 2012-05-28 18:40:36 +00:00
parent 0a5f1f5689
commit 1062e2f9a4
5 changed files with 90 additions and 89 deletions

View File

@ -191,6 +191,8 @@ data Reader = Reader {
,rParser :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
}
instance Show Reader where show r = "Reader for "++rFormat r
-- data format parse/conversion rules
-- currently the only parse (conversion) rules are those for the CSV format

View File

@ -84,14 +84,6 @@ defaultJournalPath = do
defaultJournal :: IO Journal
defaultJournal = defaultJournalPath >>= readJournalFile Nothing Nothing >>= either error' return
-- | Find the reader which can handle the given format, if any.
-- Typically there is just one; only the first is returned.
readerForFormat :: Format -> Maybe Reader
readerForFormat s | null rs = Nothing
| otherwise = Just $ head rs
where
rs = filter ((s==).rFormat) readers :: [Reader]
-- | Read a journal from the given string, trying all known formats, or simply throw an error.
readJournal' :: String -> IO Journal
readJournal' s = readJournal Nothing Nothing Nothing s >>= either error' return
@ -103,52 +95,56 @@ tests_readJournal' = [
]
-- | Read a Journal from this string or give an error message, using the
-- specified data format or trying all known formats. A CSV conversion
-- rules file may be specified for better conversion of that format,
-- and/or a file path for better error messages.
-- | Read a journal from this string, trying whatever readers seem appropriate:
--
-- - if a format is specified, try that reader only
--
-- - or if one or more readers recognises the file path and data, try those
--
-- - otherwise, try them all.
--
-- A CSV conversion rules file may also be specified for use by the CSV reader.
readJournal :: Maybe Format -> Maybe FilePath -> Maybe FilePath -> String -> IO (Either String Journal)
readJournal format rulesfile path s =
let readerstotry = case format of Nothing -> readers
Just f -> case readerForFormat f of Just r -> [r]
Nothing -> []
in firstSuccessOrBestError $ map tryReader readerstotry
-- trace (show (format, rulesfile, path)) $
tryReaders $ readersFor (format, path, s)
where
path' = fromMaybe "(string)" path
tryReader :: Reader -> IO (Either String Journal)
tryReader r = do -- printf "trying %s reader\n" (rFormat r)
(runErrorT . (rParser r) rulesfile path') s
-- if no reader succeeds, we return the error of the first;
-- ideally it would be the error of the most likely intended
-- reader, based on file suffix and/or data sniffing.
firstSuccessOrBestError :: [IO (Either String Journal)] -> IO (Either String Journal)
firstSuccessOrBestError [] = return $ Left "no readers found"
firstSuccessOrBestError attempts = firstSuccessOrBestError' attempts
-- try each reader in turn, returning the error of the first if all fail
tryReaders :: [Reader] -> IO (Either String Journal)
tryReaders = firstSuccessOrBestError []
where
firstSuccessOrBestError' [] = head attempts
firstSuccessOrBestError' (a:as) = do
r <- a
case r of Right j -> return $ Right j
Left _ -> firstSuccessOrBestError' as
firstSuccessOrBestError :: [String] -> [Reader] -> IO (Either String Journal)
firstSuccessOrBestError [] [] = return $ Left "no readers found"
firstSuccessOrBestError errs (r:rs) = do
-- printf "trying %s reader\n" (rFormat r)
result <- (runErrorT . (rParser r) rulesfile path') s
case result of Right j -> return $ Right j -- success!
Left e -> firstSuccessOrBestError (errs++[e]) rs -- keep trying
firstSuccessOrBestError (e:_) [] = return $ Left e -- none left, return first error
path' = fromMaybe "(string)" path
-- -- unknown format
-- bestErrorMsg :: [String] -> String -> Maybe FilePath -> String
-- bestErrorMsg [] _ path = printf "could not parse %sdata%s" fmts pathmsg
-- where fmts = case formats of
-- [] -> ""
-- [f] -> f ++ " "
-- fs -> intercalate ", " (init fs) ++ " or " ++ last fs ++ " "
-- pathmsg = case path of
-- Nothing -> ""
-- Just p -> " in "++p
-- -- one or more errors - report (the most appropriate ?) one
-- bestErrorMsg es s path = printf "could not parse %s data%s\n%s" (rFormat r) pathmsg e
-- where (r,e) = headDef (head readers, head es) $ filter detects $ zip readers es
-- detects (r,_) = (rDetector r) path' s
-- pathmsg = case path of
-- Nothing -> ""
-- Just p -> " in "++p
-- | Which readers are worth trying for this (possibly unspecified) format, filepath, and data ?
readersFor :: (Maybe Format, Maybe FilePath, String) -> [Reader]
readersFor (format,path,s) =
case format of
Just f -> case readerForFormat f of Just r -> [r]
Nothing -> []
Nothing -> case path of Nothing -> readers
Just "-" -> readers
Just p -> case readersForPathAndData (p,s) of [] -> readers
rs -> rs
-- | Find the (first) reader which can handle the given format, if any.
readerForFormat :: Format -> Maybe Reader
readerForFormat s | null rs = Nothing
| otherwise = Just $ head rs
where
rs = filter ((s==).rFormat) readers :: [Reader]
-- | Find the readers which think they can handle the given file path and data, if any.
readersForPathAndData :: (FilePath,String) -> [Reader]
readersForPathAndData (f,s) = filter (\r -> (rDetector r) f s) readers
-- | Read a Journal from this file (or stdin if the filename is -) or give
-- an error message, using the specified data format or trying all known

View File

@ -30,6 +30,7 @@ module Hledger.Read.CsvReader (
tests_Hledger_Read_CsvReader
)
where
import Control.Exception hiding (try)
import Control.Monad
import Control.Monad.Error
-- import Test.HUnit
@ -44,7 +45,7 @@ import System.IO (stderr)
import System.Locale (defaultTimeLocale)
import Test.HUnit
import Text.CSV (parseCSV, CSV)
import Text.ParserCombinators.Parsec hiding (parse)
import Text.ParserCombinators.Parsec hiding (parse)
import Text.ParserCombinators.Parsec.Error
import Text.ParserCombinators.Parsec.Pos
import Text.Printf (hPrintf)
@ -65,12 +66,13 @@ format = "csv"
-- | Does the given file path and data look like CSV ?
detect :: FilePath -> String -> Bool
detect f _ = takeExtension f == format
detect f _ = takeExtension f == '.':format
-- | Parse and post-process a "Journal" from CSV data, or give an error.
-- XXX currently ignores the string and reads from the file path
parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
parse rulesfile f s = do
parse rulesfile f s = -- trace ("running "++format++" reader") $
do
r <- liftIO $ readJournalFromCsv rulesfile f s
case r of Left e -> throwError e
Right j -> return j
@ -96,44 +98,43 @@ nullrules = CsvRules {
type CsvRecord = [String]
-- | Read a Journal or an error message from the given CSV data (and
-- filename, used for error messages.) To do this we read a CSV
-- conversion rules file, or auto-create a default one if it does not
-- exist. The rules filename may be specified, otherwise it will be
-- derived from the CSV filename (unless the filename is - in which case
-- an error will be raised.)
-- | Read a Journal from the given CSV data (and filename, used for error
-- messages), or return an error. Proceed as follows:
-- @
-- 1. parse the CSV data
-- 2. identify the name of a file specifying conversion rules: either use
-- the name provided, derive it from the CSV filename, or raise an error
-- if the CSV filename is -.
-- 3. auto-create the rules file with default rules if it doesn't exist
-- 4. parse the rules file
-- 5. convert the CSV records to a journal using the rules
-- @
readJournalFromCsv :: Maybe FilePath -> FilePath -> String -> IO (Either String Journal)
readJournalFromCsv rulesfile csvfile csvdata = do
let usingStdin = csvfile == "-"
rulesfile' = case rulesfile of
Just f -> f
Nothing -> if usingStdin
then error' "please use --rules-file to specify a rules file when converting stdin"
else rulesFileFor csvfile
created <- ensureRulesFileExists rulesfile'
if created
then hPrintf stderr "creating default conversion rules file %s, edit this file for better results\n" rulesfile'
else hPrintf stderr "using conversion rules file %s\n" rulesfile'
rules <- liftM (either (error'.show) id) $ parseCsvRulesFile rulesfile'
readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when converting stdin"
readJournalFromCsv mrulesfile csvfile csvdata =
handle (\e -> return $ Left $ show (e :: IOException)) $ do
csvparse <- parseCsv csvfile csvdata
let records = case csvparse of
Left e -> error' $ show e
let rs = case csvparse of
Left e -> throw $ userError $ show e
Right rs -> filter (/= [""]) rs
badrecords = take 1 $ filter ((< 2).length) rs
records = case badrecords of
[] -> rs
(_:_) -> throw $ userError $ "Parse error: at least one CSV record has less than two fields:\n"++(show $ head badrecords)
let requiredfields = max 2 (maxFieldIndex rules + 1)
let rulesfile = fromMaybe (rulesFileFor csvfile) mrulesfile
created <- records `seq` (trace "ensureRulesFile" $ ensureRulesFileExists rulesfile)
if created
then hPrintf stderr "creating default conversion rules file %s, edit this file for better results\n" rulesfile
else hPrintf stderr "using conversion rules file %s\n" rulesfile
rules <- liftM (either (throw.userError.show) id) $ parseCsvRulesFile rulesfile
let requiredfields = (maxFieldIndex rules + 1)
badrecords = take 1 $ filter ((< requiredfields).length) records
if null badrecords
then do
return $ Right nulljournal{jtxns=sortBy (comparing tdate) $ map (transactionFromCsvRecord rules) records}
else
return $ Left (unlines [
"Warning, at least one CSV record does not contain a field referenced by the"
,"conversion rules file, or has less than two fields. Are you converting a"
,"valid CSV file ? First bad record:"
, show $ head badrecords
])
return $ case badrecords of
[] -> Right nulljournal{jtxns=sortBy (comparing tdate) $ map (transactionFromCsvRecord rules) records}
(_:_) -> Left $ "Parse error: at least one CSV record does not contain a field referenced by the conversion rules file:\n"++(show $ head badrecords)
-- | Ensure there is a conversion rules file at the given path, creating a
-- default one if needed and returning True in this case.

View File

@ -66,12 +66,13 @@ format = "journal"
-- | Does the given file path and data provide hledger's journal file format ?
detect :: FilePath -> String -> Bool
detect f _ = takeExtension f == format
detect f _ = takeExtension f `elem` ['.':format, ".j"]
-- | Parse and post-process a "Journal" from hledger's journal file
-- format, or give an error.
parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
parse _ = parseJournalWith journal
parse _ = -- trace ("running "++format++" reader") .
parseJournalWith journal
-- parsing utils

View File

@ -70,13 +70,14 @@ format = "timelog"
-- | Does the given file path and data provide timeclock.el's timelog format ?
detect :: FilePath -> String -> Bool
detect f _ = takeExtension f == format
detect f _ = takeExtension f == '.':format
-- | Parse and post-process a "Journal" from timeclock.el's timelog
-- format, saving the provided file path and the current time, or give an
-- error.
parse :: Maybe FilePath -> FilePath -> String -> ErrorT String IO Journal
parse _ = parseJournalWith timelogFile
parse _ = -- trace ("running "++format++" reader") .
parseJournalWith timelogFile
timelogFile :: GenParser Char JournalContext (JournalUpdate,JournalContext)
timelogFile = do items <- many timelogItem