cleanups and early code for csv reader based on convert
This commit is contained in:
		
							parent
							
								
									2e8cf1c7f2
								
							
						
					
					
						commit
						4d7a809c4a
					
				| @ -36,6 +36,7 @@ import Hledger.Data.Types (Journal(..), Reader(..)) | |||||||
| import Hledger.Data.Journal (nullctx) | import Hledger.Data.Journal (nullctx) | ||||||
| import Hledger.Read.JournalReader as JournalReader | import Hledger.Read.JournalReader as JournalReader | ||||||
| import Hledger.Read.TimelogReader as TimelogReader | import Hledger.Read.TimelogReader as TimelogReader | ||||||
|  | import Hledger.Read.CsvReader as CsvReader | ||||||
| import Hledger.Utils | import Hledger.Utils | ||||||
| import Prelude hiding (getContents, writeFile) | import Prelude hiding (getContents, writeFile) | ||||||
| import Hledger.Utils.UTF8 (getContents, hGetContents, writeFile) | import Hledger.Utils.UTF8 (getContents, hGetContents, writeFile) | ||||||
| @ -51,6 +52,7 @@ readers :: [Reader] | |||||||
| readers = [ | readers = [ | ||||||
|   JournalReader.reader |   JournalReader.reader | ||||||
|  ,TimelogReader.reader |  ,TimelogReader.reader | ||||||
|  |  -- ,CsvReader.reader | ||||||
|  ] |  ] | ||||||
| 
 | 
 | ||||||
| -- | All the data formats we can read. | -- | All the data formats we can read. | ||||||
| @ -74,18 +76,22 @@ journalFromPathAndString format fp s = do | |||||||
|   let readerstotry = case format of Nothing -> readers |   let readerstotry = case format of Nothing -> readers | ||||||
|                                     Just f -> case readerForFormat f of Just r -> [r] |                                     Just f -> case readerForFormat f of Just r -> [r] | ||||||
|                                                                         Nothing -> [] |                                                                         Nothing -> [] | ||||||
|   (errors, journals) <- partitionEithers `fmap` mapM tryReader readerstotry |   (errors, journals) <- partitionEithers `fmap` mapM (tryReader fp s) readerstotry | ||||||
|   case journals of j:_ -> return $ Right j |   case journals of j:_ -> return $ Right j | ||||||
|                    _   -> return $ Left $ bestErrorMsg errors |                    _   -> return $ Left $ bestErrorMsg errors fp s | ||||||
|     where |     -- where | ||||||
|       tryReader r = (runErrorT . (rParser r) fp) s | 
 | ||||||
|  | tryReader :: FilePath -> String -> Reader -> IO (Either String Journal) | ||||||
|  | tryReader fp s r = do -- printf "trying to read %s format\n" (rFormat r) | ||||||
|  |                       (runErrorT . (rParser r) fp) s | ||||||
|  | 
 | ||||||
|       -- unknown format |       -- unknown format | ||||||
|       bestErrorMsg [] = printf "could not parse %sdata in %s" (fmt formats) fp | bestErrorMsg [] fp _ = printf "could not parse %sdata in %s" (fmt formats) fp | ||||||
|           where fmt [] = "" |           where fmt [] = "" | ||||||
|                 fmt [f] = f ++ " " |                 fmt [f] = f ++ " " | ||||||
|                 fmt fs = intercalate ", " (init fs) ++ " or " ++ last fs ++ " " |                 fmt fs = intercalate ", " (init fs) ++ " or " ++ last fs ++ " " | ||||||
|       -- one or more errors - report (the most appropriate ?) one |       -- one or more errors - report (the most appropriate ?) one | ||||||
|       bestErrorMsg es = printf "could not parse %s data in %s\n%s" (rFormat r) fp e | bestErrorMsg es fp s = printf "could not parse %s data in %s\n%s" (rFormat r) fp e | ||||||
|           where (r,e) = headDef (head readers, head es) $ filter detects $ zip readers es |           where (r,e) = headDef (head readers, head es) $ filter detects $ zip readers es | ||||||
|                 detects (r,_) = (rDetector r) fp s |                 detects (r,_) = (rDetector r) fp s | ||||||
| 
 | 
 | ||||||
| @ -152,6 +158,7 @@ tests_Hledger_Read = TestList | |||||||
|   [ |   [ | ||||||
|    tests_Hledger_Read_JournalReader, |    tests_Hledger_Read_JournalReader, | ||||||
|    tests_Hledger_Read_TimelogReader, |    tests_Hledger_Read_TimelogReader, | ||||||
|  |    tests_Hledger_Read_CsvReader, | ||||||
| 
 | 
 | ||||||
|    "journalFile" ~: do |    "journalFile" ~: do | ||||||
|     assertBool "journalFile should parse an empty file" (isRight $ parseWithCtx nullctx JournalReader.journalFile "") |     assertBool "journalFile should parse an empty file" (isRight $ parseWithCtx nullctx JournalReader.journalFile "") | ||||||
|  | |||||||
							
								
								
									
										584
									
								
								hledger-lib/Hledger/Read/CsvReader.hs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										584
									
								
								hledger-lib/Hledger/Read/CsvReader.hs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,584 @@ | |||||||
|  | {-| | ||||||
|  | 
 | ||||||
|  | A reader for CSV files. Uses optional extra rules to help interpret the | ||||||
|  | data, like the convert command. | ||||||
|  | 
 | ||||||
|  | -} | ||||||
|  | 
 | ||||||
|  | module Hledger.Read.CsvReader ( | ||||||
|  |        reader, | ||||||
|  |        tests_Hledger_Read_CsvReader | ||||||
|  | ) | ||||||
|  | where | ||||||
|  | import Control.Monad | ||||||
|  | import Control.Monad.Error | ||||||
|  | import Test.HUnit | ||||||
|  | -- import Text.ParserCombinators.Parsec hiding (parse) | ||||||
|  | import Data.List | ||||||
|  | import Data.Maybe | ||||||
|  | import Data.Ord | ||||||
|  | import Data.Time.Format (parseTime) | ||||||
|  | import Safe | ||||||
|  | import System.Directory (doesFileExist) | ||||||
|  | import System.Exit (exitFailure) | ||||||
|  | import System.FilePath (takeBaseName, replaceExtension) | ||||||
|  | import System.IO (stderr) | ||||||
|  | import System.Locale (defaultTimeLocale) | ||||||
|  | import Test.HUnit | ||||||
|  | import Text.CSV (parseCSV, parseCSVFromFile, CSV) | ||||||
|  | import Text.ParserCombinators.Parsec | ||||||
|  | import Text.Printf (hPrintf) | ||||||
|  | 
 | ||||||
|  | import Hledger.Data | ||||||
|  | import Hledger.Read.Utils | ||||||
|  | import Prelude hiding (getContents) | ||||||
|  | import Hledger.Utils.UTF8 (getContents) | ||||||
|  | import Hledger.Utils | ||||||
|  | import Hledger.Data.FormatStrings as FormatStrings | ||||||
|  | import Hledger.Read.JournalReader (ledgeraccountname, someamount) | ||||||
|  | -- import Hledger.Read.JournalReader (ledgerDirective, ledgerHistoricalPrice, | ||||||
|  | --                                    ledgerDefaultYear, emptyLine, ledgerdatetime) | ||||||
|  | 
 | ||||||
|  | reader :: Reader | ||||||
|  | reader = Reader format detect parse_ | ||||||
|  | 
 | ||||||
|  | format :: String | ||||||
|  | format = "csv" | ||||||
|  | 
 | ||||||
|  | -- | Does the given file path and data look like CSV ? | ||||||
|  | detect :: FilePath -> String -> Bool | ||||||
|  | detect f _ = fileSuffix f == format | ||||||
|  | 
 | ||||||
|  | -- | Parse and post-process a "Journal" from CSV data, or give an error. | ||||||
|  | -- XXX currently ignores the string and reads from the file path | ||||||
|  | parse_ :: FilePath -> String -> ErrorT String IO Journal | ||||||
|  | parse_ f s = do | ||||||
|  |   r <- liftIO $ journalFromCsv f s | ||||||
|  |   case r of Left e -> throwError e | ||||||
|  |             Right j -> return j | ||||||
|  | 
 | ||||||
|  | -- csvFile :: GenParser Char JournalContext (JournalUpdate,JournalContext) | ||||||
|  | -- csvFile = do items <- many timelogItem | ||||||
|  | --              eof | ||||||
|  | --              ctx <- getState | ||||||
|  | --              return (liftM (foldr (.) id) $ sequence items, ctx) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | -- XXX copied from Convert.hs | ||||||
|  | 
 | ||||||
|  | {- | | ||||||
|  | A set of data definitions and account-matching patterns sufficient to | ||||||
|  | convert a particular CSV data file into meaningful journal transactions. See above. | ||||||
|  | -} | ||||||
|  | data CsvRules = CsvRules { | ||||||
|  |       dateField :: Maybe FieldPosition, | ||||||
|  |       dateFormat :: Maybe String, | ||||||
|  |       statusField :: Maybe FieldPosition, | ||||||
|  |       codeField :: Maybe FieldPosition, | ||||||
|  |       descriptionField :: [FormatString], | ||||||
|  |       amountField :: Maybe FieldPosition, | ||||||
|  |       amountInField :: Maybe FieldPosition, | ||||||
|  |       amountOutField :: Maybe FieldPosition, | ||||||
|  |       currencyField :: Maybe FieldPosition, | ||||||
|  |       baseCurrency :: Maybe String, | ||||||
|  |       accountField :: Maybe FieldPosition, | ||||||
|  |       account2Field :: Maybe FieldPosition, | ||||||
|  |       effectiveDateField :: Maybe FieldPosition, | ||||||
|  |       baseAccount :: AccountName, | ||||||
|  |       accountRules :: [AccountRule] | ||||||
|  | } deriving (Show, Eq) | ||||||
|  | 
 | ||||||
|  | nullrules = CsvRules { | ||||||
|  |       dateField=Nothing, | ||||||
|  |       dateFormat=Nothing, | ||||||
|  |       statusField=Nothing, | ||||||
|  |       codeField=Nothing, | ||||||
|  |       descriptionField=[], | ||||||
|  |       amountField=Nothing, | ||||||
|  |       amountInField=Nothing, | ||||||
|  |       amountOutField=Nothing, | ||||||
|  |       currencyField=Nothing, | ||||||
|  |       baseCurrency=Nothing, | ||||||
|  |       accountField=Nothing, | ||||||
|  |       account2Field=Nothing, | ||||||
|  |       effectiveDateField=Nothing, | ||||||
|  |       baseAccount="unknown", | ||||||
|  |       accountRules=[] | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | type FieldPosition = Int | ||||||
|  | 
 | ||||||
|  | type AccountRule = ( | ||||||
|  |    [(String, Maybe String)] -- list of regex match patterns with optional replacements | ||||||
|  |   ,AccountName              -- account name to use for a transaction matching this rule | ||||||
|  |   ) | ||||||
|  | 
 | ||||||
|  | type CsvRecord = [String] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | -- | Read the CSV file named as an argument and print equivalent journal transactions, | ||||||
|  | -- using/creating a .rules file. | ||||||
|  | journalFromCsv :: FilePath -> String -> IO (Either String Journal) | ||||||
|  | journalFromCsv csvfile content = do | ||||||
|  |   let usingStdin = csvfile == "-" | ||||||
|  |       -- rulesFileSpecified = isJust $ rules_file_ opts | ||||||
|  |       rulesfile = rulesFileFor csvfile | ||||||
|  |   -- when (usingStdin && (not rulesFileSpecified)) $ error' "please use --rules-file to specify a rules file when converting stdin" | ||||||
|  |   csvparse <- parseCsv csvfile content | ||||||
|  |   let records = case csvparse of | ||||||
|  |                   Left e -> error' $ show e | ||||||
|  |                   Right rs -> filter (/= [""]) rs | ||||||
|  |   exists <- doesFileExist rulesfile | ||||||
|  |   if (not exists) then do | ||||||
|  |                   hPrintf stderr "creating conversion rules file %s, edit this file for better results\n" rulesfile | ||||||
|  |                   writeFile rulesfile initialRulesFileContent | ||||||
|  |    else | ||||||
|  |       hPrintf stderr "using conversion rules file %s\n" rulesfile | ||||||
|  |   rules <- liftM (either (error'.show) id) $ parseCsvRulesFile rulesfile | ||||||
|  |   let invalid = validateRules rules | ||||||
|  |   -- when (debug_ opts) $ hPrintf stderr "rules: %s\n" (show rules) | ||||||
|  |   when (isJust invalid) $ error (fromJust invalid) | ||||||
|  |   let requiredfields = max 2 (maxFieldIndex rules + 1) | ||||||
|  |       badrecords = take 1 $ filter ((< requiredfields).length) records | ||||||
|  |   if null badrecords | ||||||
|  |    then do | ||||||
|  |      return $ Right nulljournal{jtxns=sortBy (comparing tdate) $ map (transactionFromCsvRecord rules) records} | ||||||
|  |    else | ||||||
|  |      return $ Left (unlines [ | ||||||
|  |                       "Warning, at least one CSV record does not contain a field referenced by the" | ||||||
|  |                      ,"conversion rules file, or has less than two fields. Are you converting a" | ||||||
|  |                      ,"valid CSV file ? First bad record:" | ||||||
|  |                      , show $ head badrecords | ||||||
|  |                      ]) | ||||||
|  | 
 | ||||||
|  | parseCsv :: FilePath -> String -> IO (Either ParseError CSV) | ||||||
|  | parseCsv path content = | ||||||
|  |   case path of | ||||||
|  |     "-" -> liftM (parseCSV "(stdin)") getContents | ||||||
|  |     _   -> return $ parseCSV path content | ||||||
|  | 
 | ||||||
|  | -- | The highest (0-based) field index referenced in the field | ||||||
|  | -- definitions, or -1 if no fields are defined. | ||||||
|  | maxFieldIndex :: CsvRules -> Int | ||||||
|  | maxFieldIndex r = maximumDef (-1) $ catMaybes [ | ||||||
|  |                    dateField r | ||||||
|  |                   ,statusField r | ||||||
|  |                   ,codeField r | ||||||
|  |                   ,amountField r | ||||||
|  |                   ,amountInField r | ||||||
|  |                   ,amountOutField r | ||||||
|  |                   ,currencyField r | ||||||
|  |                   ,accountField r | ||||||
|  |                   ,account2Field r | ||||||
|  |                   ,effectiveDateField r | ||||||
|  |                   ] | ||||||
|  | 
 | ||||||
|  | -- rulesFileFor :: CliOpts -> FilePath -> FilePath | ||||||
|  | -- rulesFileFor CliOpts{rules_file_=Just f} _ = f | ||||||
|  | -- rulesFileFor CliOpts{rules_file_=Nothing} csvfile = replaceExtension csvfile ".rules" | ||||||
|  | rulesFileFor :: FilePath -> FilePath | ||||||
|  | rulesFileFor = flip replaceExtension ".rules" | ||||||
|  | 
 | ||||||
|  | initialRulesFileContent :: String | ||||||
|  | initialRulesFileContent = let prognameandversion = "hledger" in | ||||||
|  |     "# csv conversion rules file generated by " ++ prognameandversion ++ "\n" ++ | ||||||
|  |     "# Add rules to this file for more accurate conversion, see\n"++ | ||||||
|  |     "# http://hledger.org/MANUAL.html#convert\n" ++ | ||||||
|  |     "\n" ++ | ||||||
|  |     "base-account assets:bank:checking\n" ++ | ||||||
|  |     "date-field 0\n" ++ | ||||||
|  |     "description-field 4\n" ++ | ||||||
|  |     "amount-field 1\n" ++ | ||||||
|  |     "base-currency $\n" ++ | ||||||
|  |     "\n" ++ | ||||||
|  |     "# account-assigning rules\n" ++ | ||||||
|  |     "\n" ++ | ||||||
|  |     "SPECTRUM\n" ++ | ||||||
|  |     "expenses:health:gym\n" ++ | ||||||
|  |     "\n" ++ | ||||||
|  |     "ITUNES\n" ++ | ||||||
|  |     "BLKBSTR=BLOCKBUSTER\n" ++ | ||||||
|  |     "expenses:entertainment\n" ++ | ||||||
|  |     "\n" ++ | ||||||
|  |     "(TO|FROM) SAVINGS\n" ++ | ||||||
|  |     "assets:bank:savings\n" | ||||||
|  | 
 | ||||||
|  | validateRules :: CsvRules -> Maybe String | ||||||
|  | validateRules rules = let | ||||||
|  |     hasAmount = isJust $ amountField rules | ||||||
|  |     hasIn = isJust $ amountInField rules | ||||||
|  |     hasOut = isJust $ amountOutField rules | ||||||
|  |   in case (hasAmount, hasIn, hasOut) of | ||||||
|  |     (True, True, _) -> Just "Don't specify amount-in-field when specifying amount-field" | ||||||
|  |     (True, _, True) -> Just "Don't specify amount-out-field when specifying amount-field" | ||||||
|  |     (_, False, True) -> Just "Please specify amount-in-field when specifying amount-out-field" | ||||||
|  |     (_, True, False) -> Just "Please specify amount-out-field when specifying amount-in-field" | ||||||
|  |     (False, False, False) -> Just "Please specify either amount-field, or amount-in-field and amount-out-field" | ||||||
|  |     _ -> Nothing | ||||||
|  | 
 | ||||||
|  | -- rules file parser | ||||||
|  | 
 | ||||||
|  | parseCsvRulesFile :: FilePath -> IO (Either ParseError CsvRules) | ||||||
|  | parseCsvRulesFile f = do | ||||||
|  |   s <- readFile f | ||||||
|  |   return $ parseCsvRules f s | ||||||
|  | 
 | ||||||
|  | parseCsvRules :: FilePath -> String -> Either ParseError CsvRules | ||||||
|  | parseCsvRules rulesfile s = runParser csvrulesfile nullrules{baseAccount=takeBaseName rulesfile} rulesfile s | ||||||
|  | 
 | ||||||
|  | csvrulesfile :: GenParser Char CsvRules CsvRules | ||||||
|  | csvrulesfile = do | ||||||
|  |   many blankorcommentline | ||||||
|  |   many definitions | ||||||
|  |   r <- getState | ||||||
|  |   ars <- many accountrule | ||||||
|  |   many blankorcommentline | ||||||
|  |   eof | ||||||
|  |   return r{accountRules=ars} | ||||||
|  | 
 | ||||||
|  | definitions :: GenParser Char CsvRules () | ||||||
|  | definitions = do | ||||||
|  |   choice' [ | ||||||
|  |     datefield | ||||||
|  |    ,dateformat | ||||||
|  |    ,statusfield | ||||||
|  |    ,codefield | ||||||
|  |    ,descriptionfield | ||||||
|  |    ,amountfield | ||||||
|  |    ,amountinfield | ||||||
|  |    ,amountoutfield | ||||||
|  |    ,currencyfield | ||||||
|  |    ,accountfield | ||||||
|  |    ,account2field | ||||||
|  |    ,effectivedatefield | ||||||
|  |    ,basecurrency | ||||||
|  |    ,baseaccount | ||||||
|  |    ,commentline | ||||||
|  |    ] <?> "definition" | ||||||
|  |   return () | ||||||
|  | 
 | ||||||
|  | datefield = do | ||||||
|  |   string "date-field" | ||||||
|  |   many1 spacenonewline | ||||||
|  |   v <- restofline | ||||||
|  |   updateState (\r -> r{dateField=readMay v}) | ||||||
|  | 
 | ||||||
|  | effectivedatefield = do | ||||||
|  |   string "effective-date-field" | ||||||
|  |   many1 spacenonewline | ||||||
|  |   v <- restofline | ||||||
|  |   updateState (\r -> r{effectiveDateField=readMay v}) | ||||||
|  | 
 | ||||||
|  | dateformat = do | ||||||
|  |   string "date-format" | ||||||
|  |   many1 spacenonewline | ||||||
|  |   v <- restofline | ||||||
|  |   updateState (\r -> r{dateFormat=Just v}) | ||||||
|  | 
 | ||||||
|  | codefield = do | ||||||
|  |   string "code-field" | ||||||
|  |   many1 spacenonewline | ||||||
|  |   v <- restofline | ||||||
|  |   updateState (\r -> r{codeField=readMay v}) | ||||||
|  | 
 | ||||||
|  | statusfield = do | ||||||
|  |   string "status-field" | ||||||
|  |   many1 spacenonewline | ||||||
|  |   v <- restofline | ||||||
|  |   updateState (\r -> r{statusField=readMay v}) | ||||||
|  | 
 | ||||||
|  | descriptionFieldValue :: GenParser Char st [FormatString] | ||||||
|  | descriptionFieldValue = do | ||||||
|  | --      try (fieldNo <* spacenonewline) | ||||||
|  |       try fieldNo | ||||||
|  |   <|> formatStrings | ||||||
|  |   where | ||||||
|  |     fieldNo = many1 digit >>= \x -> return [FormatField False Nothing Nothing $ FieldNo $ read x] | ||||||
|  | 
 | ||||||
|  | descriptionfield = do | ||||||
|  |   string "description-field" | ||||||
|  |   many1 spacenonewline | ||||||
|  |   formatS <- descriptionFieldValue | ||||||
|  |   restofline | ||||||
|  |   updateState (\x -> x{descriptionField=formatS}) | ||||||
|  | 
 | ||||||
|  | amountfield = do | ||||||
|  |   string "amount-field" | ||||||
|  |   many1 spacenonewline | ||||||
|  |   v <- restofline | ||||||
|  |   x <- updateState (\r -> r{amountField=readMay v}) | ||||||
|  |   return x | ||||||
|  | 
 | ||||||
|  | amountinfield = do | ||||||
|  |   choice [string "amount-in-field", string "in-field"] | ||||||
|  |   many1 spacenonewline | ||||||
|  |   v <- restofline | ||||||
|  |   updateState (\r -> r{amountInField=readMay v}) | ||||||
|  | 
 | ||||||
|  | amountoutfield = do | ||||||
|  |   choice [string "amount-out-field", string "out-field"] | ||||||
|  |   many1 spacenonewline | ||||||
|  |   v <- restofline | ||||||
|  |   updateState (\r -> r{amountOutField=readMay v}) | ||||||
|  | 
 | ||||||
|  | currencyfield = do | ||||||
|  |   string "currency-field" | ||||||
|  |   many1 spacenonewline | ||||||
|  |   v <- restofline | ||||||
|  |   updateState (\r -> r{currencyField=readMay v}) | ||||||
|  | 
 | ||||||
|  | accountfield = do | ||||||
|  |   string "account-field" | ||||||
|  |   many1 spacenonewline | ||||||
|  |   v <- restofline | ||||||
|  |   updateState (\r -> r{accountField=readMay v}) | ||||||
|  | 
 | ||||||
|  | account2field = do | ||||||
|  |   string "account2-field" | ||||||
|  |   many1 spacenonewline | ||||||
|  |   v <- restofline | ||||||
|  |   updateState (\r -> r{account2Field=readMay v}) | ||||||
|  | 
 | ||||||
|  | basecurrency = do | ||||||
|  |   choice [string "base-currency", string "currency"] | ||||||
|  |   many1 spacenonewline | ||||||
|  |   v <- restofline | ||||||
|  |   updateState (\r -> r{baseCurrency=Just v}) | ||||||
|  | 
 | ||||||
|  | baseaccount = do | ||||||
|  |   string "base-account" | ||||||
|  |   many1 spacenonewline | ||||||
|  |   v <- ledgeraccountname | ||||||
|  |   optional newline | ||||||
|  |   updateState (\r -> r{baseAccount=v}) | ||||||
|  | 
 | ||||||
|  | accountrule :: GenParser Char CsvRules AccountRule | ||||||
|  | accountrule = do | ||||||
|  |   many blankorcommentline | ||||||
|  |   pats <- many1 matchreplacepattern | ||||||
|  |   guard $ length pats >= 2 | ||||||
|  |   let pats' = init pats | ||||||
|  |       acct = either (fail.show) id $ runParser ledgeraccountname () "" $ fst $ last pats | ||||||
|  |   many blankorcommentline | ||||||
|  |   return (pats',acct) | ||||||
|  |  <?> "account rule" | ||||||
|  | 
 | ||||||
|  | blanklines = many1 blankline | ||||||
|  | 
 | ||||||
|  | blankline = many spacenonewline >> newline >> return () <?> "blank line" | ||||||
|  | 
 | ||||||
|  | commentchar = oneOf ";#" | ||||||
|  | 
 | ||||||
|  | commentline = many spacenonewline >> commentchar >> restofline >> return () <?> "comment line" | ||||||
|  | 
 | ||||||
|  | blankorcommentline = choice' [blankline, commentline] | ||||||
|  | 
 | ||||||
|  | matchreplacepattern = do | ||||||
|  |   notFollowedBy commentchar | ||||||
|  |   matchpat <- many1 (noneOf "=\n") | ||||||
|  |   replpat <- optionMaybe $ do {char '='; many $ noneOf "\n"} | ||||||
|  |   newline | ||||||
|  |   return (matchpat,replpat) | ||||||
|  | 
 | ||||||
|  | -- csv record conversion | ||||||
|  | formatD :: CsvRecord -> Bool -> Maybe Int -> Maybe Int -> Field -> String | ||||||
|  | formatD record leftJustified min max f = case f of  | ||||||
|  |   FieldNo n       -> maybe "" show $ atMay record n | ||||||
|  |   -- Some of these might in theory in read from fields | ||||||
|  |   FormatStrings.Account  -> "" | ||||||
|  |   DepthSpacer     -> "" | ||||||
|  |   Total           -> "" | ||||||
|  |   DefaultDate     -> "" | ||||||
|  |   Description     -> "" | ||||||
|  |  where | ||||||
|  |    show = formatValue leftJustified min max | ||||||
|  | 
 | ||||||
|  | formatDescription :: CsvRecord -> [FormatString] -> String | ||||||
|  | formatDescription _ [] = "" | ||||||
|  | formatDescription record (f:fs) = s ++ (formatDescription record fs) | ||||||
|  |   where s = case f of | ||||||
|  |                 FormatLiteral l -> l | ||||||
|  |                 FormatField leftJustified min max field  -> formatD record leftJustified min max field | ||||||
|  | 
 | ||||||
|  | transactionFromCsvRecord :: CsvRules -> CsvRecord -> Transaction | ||||||
|  | transactionFromCsvRecord rules fields = | ||||||
|  |   let  | ||||||
|  |       date = parsedate $ normaliseDate (dateFormat rules) $ maybe "1900/1/1" (atDef "" fields) (dateField rules) | ||||||
|  |       effectivedate = do idx <- effectiveDateField rules | ||||||
|  |                          return $ parsedate $ normaliseDate (dateFormat rules) $ (atDef "" fields) idx | ||||||
|  |       status = maybe False (null . strip . (atDef "" fields)) (statusField rules) | ||||||
|  |       code = maybe "" (atDef "" fields) (codeField rules) | ||||||
|  |       desc = formatDescription fields (descriptionField rules) | ||||||
|  |       comment = "" | ||||||
|  |       precomment = "" | ||||||
|  |       baseacc = maybe (baseAccount rules) (atDef "" fields) (accountField rules) | ||||||
|  |       amountstr = getAmount rules fields | ||||||
|  |       amountstr' = strnegate amountstr where strnegate ('-':s) = s | ||||||
|  |                                              strnegate s = '-':s | ||||||
|  |       currency = maybe (fromMaybe "" $ baseCurrency rules) (atDef "" fields) (currencyField rules) | ||||||
|  |       amountstr'' = currency ++ amountstr' | ||||||
|  |       amountparse = runParser someamount nullctx "" amountstr'' | ||||||
|  |       amount = either (const nullmixedamt) id amountparse | ||||||
|  |       -- Using costOfMixedAmount here to allow complex costs like "10 GBP @@ 15 USD". | ||||||
|  |       -- Aim is to have "10 GBP @@ 15 USD" applied to account "acct", but have "-15USD" applied to "baseacct" | ||||||
|  |       baseamount = costOfMixedAmount amount | ||||||
|  |       unknownacct | (readDef 0 amountstr' :: Double) < 0 = "income:unknown" | ||||||
|  |                   | otherwise = "expenses:unknown" | ||||||
|  |       (acct',newdesc) = identify (accountRules rules) unknownacct desc | ||||||
|  |       acct = maybe acct' (atDef "" fields) (account2Field rules) | ||||||
|  |       t = Transaction { | ||||||
|  |               tdate=date, | ||||||
|  |               teffectivedate=effectivedate, | ||||||
|  |               tstatus=status, | ||||||
|  |               tcode=code, | ||||||
|  |               tdescription=newdesc, | ||||||
|  |               tcomment=comment, | ||||||
|  |               tpreceding_comment_lines=precomment, | ||||||
|  |               tmetadata=[], | ||||||
|  |               tpostings=[ | ||||||
|  |                    Posting { | ||||||
|  |                      pstatus=False, | ||||||
|  |                      paccount=acct, | ||||||
|  |                      pamount=amount, | ||||||
|  |                      pcomment="", | ||||||
|  |                      ptype=RegularPosting, | ||||||
|  |                      pmetadata=[], | ||||||
|  |                      ptransaction=Just t | ||||||
|  |                    }, | ||||||
|  |                    Posting { | ||||||
|  |                      pstatus=False, | ||||||
|  |                      paccount=baseacc, | ||||||
|  |                      pamount=(-baseamount), | ||||||
|  |                      pcomment="", | ||||||
|  |                      ptype=RegularPosting, | ||||||
|  |                      pmetadata=[], | ||||||
|  |                      ptransaction=Just t | ||||||
|  |                    } | ||||||
|  |                   ] | ||||||
|  |             } | ||||||
|  |   in t | ||||||
|  | 
 | ||||||
|  | -- | Convert some date string with unknown format to YYYY/MM/DD. | ||||||
|  | normaliseDate :: Maybe String -- ^ User-supplied date format: this should be tried in preference to all others | ||||||
|  |               -> String -> String | ||||||
|  | normaliseDate mb_user_format s = | ||||||
|  |     let parsewith = flip (parseTime defaultTimeLocale) s in | ||||||
|  |     maybe (error' $ "could not parse \""++s++"\" as a date, consider adding a date-format directive or upgrading") | ||||||
|  |           showDate $ | ||||||
|  |           firstJust $ (map parsewith $ | ||||||
|  |                        maybe [] (:[]) mb_user_format | ||||||
|  |                        -- the - modifier requires time-1.2.0.5, released | ||||||
|  |                        -- in 2011/5, so for now we emulate it for wider | ||||||
|  |                        -- compatibility.  time < 1.2.0.5 also has a buggy | ||||||
|  |                        -- %y which we don't do anything about. | ||||||
|  |                        -- ++ [ | ||||||
|  |                        -- "%Y/%m/%d" | ||||||
|  |                        -- ,"%Y/%-m/%-d" | ||||||
|  |                        -- ,"%Y-%m-%d" | ||||||
|  |                        -- ,"%Y-%-m-%-d" | ||||||
|  |                        -- ,"%m/%d/%Y" | ||||||
|  |                        -- ,"%-m/%-d/%Y" | ||||||
|  |                        -- ,"%m-%d-%Y" | ||||||
|  |                        -- ,"%-m-%-d-%Y" | ||||||
|  |                        -- ] | ||||||
|  |                       ) | ||||||
|  |                       ++ [ | ||||||
|  |                        parseTime defaultTimeLocale "%Y/%m/%e" s | ||||||
|  |                       ,parseTime defaultTimeLocale "%Y-%m-%e" s | ||||||
|  |                       ,parseTime defaultTimeLocale "%m/%e/%Y" s | ||||||
|  |                       ,parseTime defaultTimeLocale "%m-%e-%Y" s | ||||||
|  |                       ,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s) | ||||||
|  |                       ,parseTime defaultTimeLocale "%Y-%m-%e" (take 5 s ++ "0" ++ drop 5 s) | ||||||
|  |                       ,parseTime defaultTimeLocale "%m/%e/%Y" ('0':s) | ||||||
|  |                       ,parseTime defaultTimeLocale "%m-%e-%Y" ('0':s) | ||||||
|  |                       ] | ||||||
|  | 
 | ||||||
|  | -- | Apply account matching rules to a transaction description to obtain | ||||||
|  | -- the most appropriate account and a new description. | ||||||
|  | identify :: [AccountRule] -> String -> String -> (String,String) | ||||||
|  | identify rules defacct desc | null matchingrules = (defacct,desc) | ||||||
|  |                             | otherwise = (acct,newdesc) | ||||||
|  |     where | ||||||
|  |       matchingrules = filter ismatch rules :: [AccountRule] | ||||||
|  |           where ismatch = any ((`regexMatchesCI` desc) . fst) . fst | ||||||
|  |       (prs,acct) = head matchingrules | ||||||
|  |       p_ms_r = filter (\(_,m,_) -> m) $ map (\(p,r) -> (p, p `regexMatchesCI` desc, r)) prs | ||||||
|  |       (p,_,r) = head p_ms_r | ||||||
|  |       newdesc = case r of Just repl -> regexReplaceCI p repl desc | ||||||
|  |                           Nothing   -> desc | ||||||
|  | 
 | ||||||
|  | caseinsensitive = ("(?i)"++) | ||||||
|  | 
 | ||||||
|  | getAmount :: CsvRules -> CsvRecord -> String | ||||||
|  | getAmount rules fields = case amountField rules of | ||||||
|  |   Just f  -> maybe "" (atDef "" fields) $ Just f | ||||||
|  |   Nothing -> | ||||||
|  |     case (i, o) of | ||||||
|  |       (x, "") -> x | ||||||
|  |       ("", x) -> "-"++x | ||||||
|  |       p -> error' $ "using amount-in-field and amount-out-field, found a value in both fields: "++show p | ||||||
|  |     where | ||||||
|  |       i = maybe "" (atDef "" fields) (amountInField rules) | ||||||
|  |       o = maybe "" (atDef "" fields) (amountOutField rules) | ||||||
|  | 
 | ||||||
|  | tests_Hledger_Read_CsvReader = TestList (test_parser ++ test_description_parsing) | ||||||
|  | 
 | ||||||
|  | test_description_parsing = [ | ||||||
|  |       "description-field 1" ~: assertParseDescription "description-field 1\n" [FormatField False Nothing Nothing (FieldNo 1)] | ||||||
|  |     , "description-field 1 " ~: assertParseDescription "description-field 1 \n" [FormatField False Nothing Nothing (FieldNo 1)] | ||||||
|  |     , "description-field %(1)" ~: assertParseDescription "description-field %(1)\n" [FormatField False Nothing Nothing (FieldNo 1)] | ||||||
|  |     , "description-field %(1)/$(2)" ~: assertParseDescription "description-field %(1)/%(2)\n" [ | ||||||
|  |           FormatField False Nothing Nothing (FieldNo 1) | ||||||
|  |         , FormatLiteral "/" | ||||||
|  |         , FormatField False Nothing Nothing (FieldNo 2) | ||||||
|  |         ] | ||||||
|  |     ] | ||||||
|  |   where | ||||||
|  |     assertParseDescription string expected = do assertParseEqual (parseDescription string) (nullrules {descriptionField = expected}) | ||||||
|  |     parseDescription :: String -> Either ParseError CsvRules | ||||||
|  |     parseDescription x = runParser descriptionfieldWrapper nullrules "(unknown)" x | ||||||
|  |     descriptionfieldWrapper :: GenParser Char CsvRules CsvRules | ||||||
|  |     descriptionfieldWrapper = do | ||||||
|  |       descriptionfield | ||||||
|  |       r <- getState | ||||||
|  |       return r | ||||||
|  | 
 | ||||||
|  | test_parser =  [ | ||||||
|  | 
 | ||||||
|  |    "convert rules parsing: empty file" ~: do | ||||||
|  |      -- let assertMixedAmountParse parseresult mixedamount = | ||||||
|  |      --         (either (const "parse error") showMixedAmountDebug parseresult) ~?= (showMixedAmountDebug mixedamount) | ||||||
|  |     assertParseEqual (parseCsvRules "unknown" "") nullrules | ||||||
|  | 
 | ||||||
|  |   ,"convert rules parsing: accountrule" ~: do | ||||||
|  |      assertParseEqual (parseWithCtx nullrules accountrule "A\na\n") -- leading blank line required | ||||||
|  |                  ([("A",Nothing)], "a") | ||||||
|  | 
 | ||||||
|  |   ,"convert rules parsing: trailing comments" ~: do | ||||||
|  |      assertParse (parseWithCtx nullrules csvrulesfile "A\na\n# \n#\n") | ||||||
|  | 
 | ||||||
|  |   ,"convert rules parsing: trailing blank lines" ~: do | ||||||
|  |      assertParse (parseWithCtx nullrules csvrulesfile "A\na\n\n  \n") | ||||||
|  | 
 | ||||||
|  |   -- not supported | ||||||
|  |   -- ,"convert rules parsing: no final newline" ~: do | ||||||
|  |   --    assertParse (parseWithCtx nullrules csvrulesfile "A\na") | ||||||
|  |   --    assertParse (parseWithCtx nullrules csvrulesfile "A\na\n# \n#") | ||||||
|  |   --    assertParse (parseWithCtx nullrules csvrulesfile "A\na\n\n  ") | ||||||
|  | 
 | ||||||
|  |                  -- (nullrules{ | ||||||
|  |                  --   -- dateField=Maybe FieldPosition, | ||||||
|  |                  --   -- statusField=Maybe FieldPosition, | ||||||
|  |                  --   -- codeField=Maybe FieldPosition, | ||||||
|  |                  --   -- descriptionField=Maybe FieldPosition, | ||||||
|  |                  --   -- amountField=Maybe FieldPosition, | ||||||
|  |                  --   -- currencyField=Maybe FieldPosition, | ||||||
|  |                  --   -- baseCurrency=Maybe String, | ||||||
|  |                  --   -- baseAccount=AccountName, | ||||||
|  |                  --   accountRules=[ | ||||||
|  |                  --        ([("A",Nothing)], "a") | ||||||
|  |                  --       ] | ||||||
|  |                  --  }) | ||||||
|  | 
 | ||||||
|  |   ] | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user