From b6ff17068858d2148b9305506928fb7c24d3d8ca Mon Sep 17 00:00:00 2001 From: Simon Michael Date: Fri, 18 Nov 2016 13:24:57 -0800 Subject: [PATCH] lib: simplify format detection, avoid ledger reader by default When we don't know a file's format, instead of choosing a subset of readers based on content sniffing, now we just try them all. Also, LedgerReader is now used only as a last resort, as it's not yet competitive with JournalReader. --- hledger-lib/Hledger/Data/Types.hs | 15 ++-- hledger-lib/Hledger/Read.hs | 78 ++++++--------------- hledger-lib/Hledger/Read/CsvReader.hs | 17 ++--- hledger-lib/Hledger/Read/JournalReader.hs | 20 ++---- hledger-lib/Hledger/Read/LedgerReader.hs | 29 ++------ hledger-lib/Hledger/Read/TimeclockReader.hs | 18 ++--- hledger-lib/Hledger/Read/TimedotReader.hs | 19 ++--- 7 files changed, 61 insertions(+), 135 deletions(-) diff --git a/hledger-lib/Hledger/Data/Types.hs b/hledger-lib/Hledger/Data/Types.hs index 10f76751c..12b39a2c5 100644 --- a/hledger-lib/Hledger/Data/Types.hs +++ b/hledger-lib/Hledger/Data/Types.hs @@ -319,12 +319,19 @@ type StorageFormat = String -- | A hledger journal reader is a triple of storage format name, a -- detector of that format, and a parser from that format to Journal. data Reader = Reader { - -- name of the format this reader handles + + -- The canonical name of the format handled by this reader rFormat :: StorageFormat - -- quickly check if this reader can probably handle the given file path and file content - ,rDetector :: FilePath -> Text -> Bool - -- parse the given string, using the given parse rules file if any, returning a journal or error aware of the given file path + + -- The file extensions recognised as containing this format + ,rExtensions :: [String] + + -- A text parser for this format, accepting an optional rules file, + -- assertion-checking flag, and file path for error messages, + -- producing an exception-raising IO action that returns a journal + -- or error message. ,rParser :: Maybe FilePath -> Bool -> FilePath -> Text -> ExceptT String IO Journal + } instance Show Reader where show r = rFormat r ++ " reader" diff --git a/hledger-lib/Hledger/Read.hs b/hledger-lib/Hledger/Read.hs index 539a61e93..a560c47b1 100644 --- a/hledger-lib/Hledger/Read.hs +++ b/hledger-lib/Hledger/Read.hs @@ -12,9 +12,7 @@ readJournalFiles readJournalFile requireJournalFileExists readJournal - readersFor - readerForStorageFormat - readersForPathAndData + findReader tryReaders @ @@ -34,20 +32,9 @@ module Hledger.Read ( -- * Journal parsing readJournal, - readersFor, - readerForStorageFormat, - readersForPathAndData, - tryReaders, readJournal', - readFormatNames, -- * Re-exported - -- accountnamep, - -- amountp, - -- amountp', - -- mamountp', - -- numberp, - -- codep, accountaliasp, postingp, module Hledger.Read.Common, @@ -64,10 +51,11 @@ import Data.List import Data.Maybe import Data.Text (Text) import qualified Data.Text as T +import Safe import System.Directory (doesFileExist, getHomeDirectory) import System.Environment (getEnv) import System.Exit (exitFailure) -import System.FilePath (()) +import System.FilePath ((), takeExtension) import System.IO (stderr) import Test.HUnit import Text.Printf @@ -85,20 +73,16 @@ import Prelude hiding (getContents, writeFile) import Hledger.Utils.UTF8IOCompat (writeFile) --- The available data file readers, each one handling a particular data --- format. The first is also used as the default for unknown formats. +-- The available journal readers, each one handling a particular data format. readers :: [Reader] readers = [ JournalReader.reader - ,LedgerReader.reader ,TimeclockReader.reader ,TimedotReader.reader ,CsvReader.reader + ,LedgerReader.reader ] -readFormatNames :: [StorageFormat] -readFormatNames = map rFormat readers - journalEnvVar = "LEDGER_FILE" journalEnvVar2 = "LEDGER" journalDefaultFilename = ".hledger.journal" @@ -192,44 +176,28 @@ tests_readJournal' = [ -- | @readJournal mformat mrulesfile assrt mpath t@ -- --- Read a journal from this string, trying whatever readers seem appropriate: --- --- - if a format is specified, try that reader only --- --- - or if one or more readers recognises the file path and data, try those --- --- - otherwise, try them all. --- --- A CSV conversion rules file may also be specified for use by the CSV reader. --- Also there is a flag specifying whether to check or ignore balance assertions in the journal. +-- Try to read a Journal from some text. +-- If a format is specified (mformat), try only that reader. +-- Otherwise if the file path is provided (mpath), and it specifies a format, try only that reader. +-- Otherwise try all readers in turn until one succeeds, or return the first error if none of them succeed. +-- A CSV conversion rules file may be specified (mrulesfile) for use by the CSV reader. +-- If the assrt flag is true, also check and enforce balance assertions in the journal. readJournal :: Maybe StorageFormat -> Maybe FilePath -> Bool -> Maybe FilePath -> Text -> IO (Either String Journal) readJournal mformat mrulesfile assrt mpath t = - let rs = readersFor (mformat, mpath, t) - in tryReaders rs mrulesfile assrt mpath t + let rs = maybe readers (:[]) $ findReader mformat mpath + in tryReaders rs mrulesfile assrt mpath t --- | @readersFor (format,path,t)@ +-- | @findReader mformat mpath@ -- --- Which readers are worth trying for this (possibly unspecified) format, filepath, and data ? -readersFor :: (Maybe StorageFormat, Maybe FilePath, Text) -> [Reader] -readersFor (format,path,t) = - dbg1 ("possible readers for "++show (format,path,textElideRight 30 t)) $ - case format of - Just f -> case readerForStorageFormat f of Just r -> [r] - Nothing -> [] - Nothing -> case path of Nothing -> readers - Just p -> case readersForPathAndData (p,t) of [] -> readers - rs -> rs - --- | Find the (first) reader which can handle the given format, if any. -readerForStorageFormat :: StorageFormat -> Maybe Reader -readerForStorageFormat s | null rs = Nothing - | otherwise = Just $ head rs - where - rs = filter ((s==).rFormat) readers :: [Reader] - --- | Find the readers which think they can handle the given file path and data, if any. -readersForPathAndData :: (FilePath,Text) -> [Reader] -readersForPathAndData (f,t) = filter (\r -> dbg1 ("try "++rFormat r++" format") $ (rDetector r) f t) readers +-- Find the reader for the given format (mformat), if any. +-- Or if no format is provided, find the first reader that handles the +-- file name's extension, if any. +findReader :: Maybe StorageFormat -> Maybe FilePath -> Maybe Reader +findReader Nothing Nothing = Nothing +findReader (Just fmt) _ = headMay [r | r <- readers, fmt == rFormat r] +findReader Nothing (Just path) = headMay [r | r <- readers, ext `elem` rExtensions r] + where + ext = drop 1 $ takeExtension path -- | @tryReaders readers mrulesfile assrt path t@ -- diff --git a/hledger-lib/Hledger/Read/CsvReader.hs b/hledger-lib/Hledger/Read/CsvReader.hs index 209df6ccb..20e0e9abc 100644 --- a/hledger-lib/Hledger/Read/CsvReader.hs +++ b/hledger-lib/Hledger/Read/CsvReader.hs @@ -64,18 +64,11 @@ import Hledger.Read.Common (amountp, statusp, genericSourcePos) reader :: Reader -reader = Reader format detect parse - -format :: String -format = "csv" - --- | Does the given file path and data look like something this reader can handle ? -detect :: FilePath -> Text -> Bool -detect f excerpt - -- file name known: try this reader if it has any of these extensions - | f /= "-" = takeExtension f `elem` ['.':format] - -- file name unknown: try this reader if excerpt contains two or more commas - | otherwise = T.length (T.filter (==',') excerpt) >= 2 +reader = Reader + {rFormat = "csv" + ,rExtensions = ["csv"] + ,rParser = parse + } -- | Parse and post-process a "Journal" from CSV data, or give an error. -- XXX currently ignores the string and reads from the file path diff --git a/hledger-lib/Hledger/Read/JournalReader.hs b/hledger-lib/Hledger/Read/JournalReader.hs index 4733ee28a..bb63f54eb 100644 --- a/hledger-lib/Hledger/Read/JournalReader.hs +++ b/hledger-lib/Hledger/Read/JournalReader.hs @@ -106,21 +106,11 @@ import Hledger.Utils --- * reader reader :: Reader -reader = Reader format detect parse - -format :: String -format = "journal" - --- | Does the given file path and data look like something this reader can handle ? -detect :: FilePath -> Text -> Bool -detect f _ - -- file name known: try this reader if it has any of these extensions - | f /= "-" = takeExtension f `elem` ['.':format, ".j", ".hledger", ".ledger", ".l"] - -- file name unknown: always try this reader - | otherwise = True - -- file name unknown: try this reader if we can see something like a journal entry - -- (digits in column 0 with the next line indented) - -- otherwise = regexMatches "(^|\n)[0-9]+.*\n[ \t]+" $ T.unpack excerpt +reader = Reader + {rFormat = "journal" + ,rExtensions = ["journal", "j", "hledger", "ledger"] + ,rParser = parse + } -- | Parse and post-process a "Journal" from hledger's journal file -- format, or give an error. diff --git a/hledger-lib/Hledger/Read/LedgerReader.hs b/hledger-lib/Hledger/Read/LedgerReader.hs index 8e9ecf97d..9b3e502dc 100644 --- a/hledger-lib/Hledger/Read/LedgerReader.hs +++ b/hledger-lib/Hledger/Read/LedgerReader.hs @@ -14,19 +14,11 @@ where --- * imports import Prelude () import Prelude.Compat hiding (readFile) --- import qualified Control.Exception as C -import Control.Monad import Control.Monad.IO.Class (liftIO) import Control.Monad.Except (ExceptT(..), throwError) --- import Control.Monad.State.Strict --- import qualified Data.Map.Strict as M import Data.Maybe --- import Data.List import Data.Text (Text, pack) import Data.Text.Encoding (encodeUtf8) --- import qualified Data.Text as T --- import Data.Time.Calendar --- import Data.Time.LocalTime -- import Safe import Test.HUnit -- #ifdef TESTS @@ -35,7 +27,6 @@ import Test.HUnit -- #endif import Text.Megaparsec (eof) -- import Text.Printf -import System.FilePath import System.Time import qualified Filesystem.Path.CurrentOS as F @@ -51,20 +42,14 @@ import Text.Trifecta.Result (Result(..)) --- * reader reader :: Reader -reader = Reader format detect parse +reader = Reader + {rFormat = "ledger" + ,rExtensions = [] + ,rParser = parse + } -format :: String -format = "ledger" - --- | Does the given file path and data look like something this reader can handle ? -detect :: FilePath -> Text -> Bool -detect f _ - -- file name known: try this reader if it has any of these extensions - | f /= "-" = takeExtension f `elem` ['.':format, ".l"] - -- file name unknown: don't try this reader - | otherwise = False - --- | Parse and post-process a "Journal" from ledger's journal format, or give an error. +-- | Generate an action that parses and post-processes a "Journal" from a +-- C++ Ledger journal, or raises an error. parse :: Maybe FilePath -> Bool -> FilePath -> Text -> ExceptT String IO Journal parse _mrulespath assrt path txt = do let diff --git a/hledger-lib/Hledger/Read/TimeclockReader.hs b/hledger-lib/Hledger/Read/TimeclockReader.hs index 3503decd7..33ac8fe5b 100644 --- a/hledger-lib/Hledger/Read/TimeclockReader.hs +++ b/hledger-lib/Hledger/Read/TimeclockReader.hs @@ -61,7 +61,6 @@ import Data.Text (Text) import qualified Data.Text as T import Test.HUnit import Text.Megaparsec hiding (parse) -import System.FilePath import Hledger.Data -- XXX too much reuse ? @@ -70,18 +69,11 @@ import Hledger.Utils reader :: Reader -reader = Reader format detect parse - -format :: String -format = "timeclock" - --- | Does the given file path and data look like something this reader can handle ? -detect :: FilePath -> Text -> Bool -detect f excerpt - -- file name known: try this reader if it has any of these extensions - | f /= "-" = takeExtension f `elem` ['.':format] - -- file name unknown: try this reader if a line starts with "i " or "o " in excerpt - | otherwise = regexMatches "(^|\n)[io] " $ T.unpack excerpt +reader = Reader + {rFormat = "timeclock" + ,rExtensions = ["timeclock"] + ,rParser = parse + } -- | Parse and post-process a "Journal" from timeclock.el's timeclock -- format, saving the provided file path and the current time, or give an diff --git a/hledger-lib/Hledger/Read/TimedotReader.hs b/hledger-lib/Hledger/Read/TimedotReader.hs index de31e9a1d..b23f4847f 100644 --- a/hledger-lib/Hledger/Read/TimedotReader.hs +++ b/hledger-lib/Hledger/Read/TimedotReader.hs @@ -41,10 +41,8 @@ import Data.Char (isSpace) import Data.List (foldl') import Data.Maybe import Data.Text (Text) -import qualified Data.Text as T import Test.HUnit import Text.Megaparsec hiding (parse) -import System.FilePath import Hledger.Data import Hledger.Read.Common @@ -56,18 +54,11 @@ import Hledger.Utils hiding (ptrace) ptrace = return reader :: Reader -reader = Reader format detect parse - -format :: String -format = "timedot" - --- | Does the given file path and data look like something this reader can handle ? -detect :: FilePath -> Text -> Bool -detect f excerpt - -- file name known: try this reader if it has any of these extensions - | f /= "-" = takeExtension f `elem` ['.':format] - -- file name unknown: try this reader if a line starts with a number in excerpt - | otherwise = regexMatches "(^|\n)[0-9]" $ T.unpack excerpt +reader = Reader + {rFormat = "timedot" + ,rExtensions = ["timedot"] + ,rParser = parse + } -- | Parse and post-process a "Journal" from the timedot format, or give an error. parse :: Maybe FilePath -> Bool -> FilePath -> Text -> ExceptT String IO Journal