imp: csv: new timezone rule; convert zoned date-times to local dates (#1936)
Previously, CSV date-times with a different time zone from yours (with or without explicit timezones in the CSV) could give off-by-one dates, because the CSV timezone was ignored. Now, 1. you can use the `timezone` rule to indicate which other timezone a CSV is implicitly using 2. CSV date-times with a timezone - whether declared by rule or parsed with %Z - are localised to the system time zone (or another set with the TZ environment variable).
This commit is contained in:
parent
7d1b1cadce
commit
3b24d9465b
@ -56,8 +56,8 @@ import qualified Data.Text.Encoding as T
|
|||||||
import qualified Data.Text.IO as T
|
import qualified Data.Text.IO as T
|
||||||
import qualified Data.Text.Lazy as TL
|
import qualified Data.Text.Lazy as TL
|
||||||
import qualified Data.Text.Lazy.Builder as TB
|
import qualified Data.Text.Lazy.Builder as TB
|
||||||
import Data.Time.Calendar (Day)
|
import Data.Time ( Day, TimeZone, UTCTime, LocalTime, ZonedTime(ZonedTime),
|
||||||
import Data.Time.Format (parseTimeM, defaultTimeLocale)
|
defaultTimeLocale, getCurrentTimeZone, localDay, parseTimeM, utcToLocalTime, localTimeToUTC, zonedTimeToUTC)
|
||||||
import Safe (atMay, headMay, lastMay, readMay)
|
import Safe (atMay, headMay, lastMay, readMay)
|
||||||
import System.Directory (doesFileExist)
|
import System.Directory (doesFileExist)
|
||||||
import System.FilePath ((</>), takeDirectory, takeExtension, takeFileName)
|
import System.FilePath ((</>), takeDirectory, takeExtension, takeFileName)
|
||||||
@ -460,6 +460,7 @@ directives =
|
|||||||
-- ,"default-account"
|
-- ,"default-account"
|
||||||
-- ,"default-currency"
|
-- ,"default-currency"
|
||||||
,"skip"
|
,"skip"
|
||||||
|
,"timezone"
|
||||||
,"newest-first"
|
,"newest-first"
|
||||||
, "balance-type"
|
, "balance-type"
|
||||||
]
|
]
|
||||||
@ -703,6 +704,13 @@ readJournalFromCsv mrulesfile csvfile csvdata = do
|
|||||||
Just "" -> return 1
|
Just "" -> return 1
|
||||||
Just s -> maybe (throwError $ "could not parse skip value: " ++ show s) return . readMay $ T.unpack s
|
Just s -> maybe (throwError $ "could not parse skip value: " ++ show s) return . readMay $ T.unpack s
|
||||||
|
|
||||||
|
mtzin <- case getDirective "timezone" rules of
|
||||||
|
Nothing -> return Nothing
|
||||||
|
Just s ->
|
||||||
|
maybe (throwError $ "could not parse time zone: " ++ T.unpack s) (return.Just) $
|
||||||
|
parseTimeM False defaultTimeLocale "%Z" $ T.unpack s
|
||||||
|
tzout <- liftIO getCurrentTimeZone
|
||||||
|
|
||||||
-- parse csv
|
-- parse csv
|
||||||
let
|
let
|
||||||
-- parsec seems to fail if you pass it "-" here TODO: try again with megaparsec
|
-- parsec seems to fail if you pass it "-" here TODO: try again with megaparsec
|
||||||
@ -733,9 +741,14 @@ readJournalFromCsv mrulesfile csvfile csvdata = do
|
|||||||
line' = (mkPos . (+1) . unPos) line
|
line' = (mkPos . (+1) . unPos) line
|
||||||
pos' = SourcePos name line' col
|
pos' = SourcePos name line' col
|
||||||
in
|
in
|
||||||
(pos', transactionFromCsvRecord pos rules r)
|
(pos', transactionFromCsvRecord timesarezoned mtzin tzout pos rules r)
|
||||||
)
|
)
|
||||||
(initialPos parsecfilename) records
|
(initialPos parsecfilename) records
|
||||||
|
where
|
||||||
|
timesarezoned =
|
||||||
|
case csvRule rules "date-format" of
|
||||||
|
Just f | any (`T.isInfixOf` f) ["%Z","%z","%EZ","%Ez"] -> True
|
||||||
|
_ -> False
|
||||||
|
|
||||||
-- Ensure transactions are ordered chronologically.
|
-- Ensure transactions are ordered chronologically.
|
||||||
-- First, if the CSV records seem to be most-recent-first (because
|
-- First, if the CSV records seem to be most-recent-first (because
|
||||||
@ -856,8 +869,8 @@ hledgerField = getEffectiveAssignment
|
|||||||
hledgerFieldValue :: CsvRules -> CsvRecord -> HledgerFieldName -> Maybe Text
|
hledgerFieldValue :: CsvRules -> CsvRecord -> HledgerFieldName -> Maybe Text
|
||||||
hledgerFieldValue rules record = fmap (renderTemplate rules record) . hledgerField rules record
|
hledgerFieldValue rules record = fmap (renderTemplate rules record) . hledgerField rules record
|
||||||
|
|
||||||
transactionFromCsvRecord :: SourcePos -> CsvRules -> CsvRecord -> Transaction
|
transactionFromCsvRecord :: Bool -> Maybe TimeZone -> TimeZone -> SourcePos -> CsvRules -> CsvRecord -> Transaction
|
||||||
transactionFromCsvRecord sourcepos rules record = t
|
transactionFromCsvRecord timesarezoned mtzin tzout sourcepos rules record = t
|
||||||
where
|
where
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
-- 1. Define some helpers:
|
-- 1. Define some helpers:
|
||||||
@ -866,7 +879,8 @@ transactionFromCsvRecord sourcepos rules record = t
|
|||||||
-- ruleval = csvRuleValue rules record :: DirectiveName -> Maybe String
|
-- ruleval = csvRuleValue rules record :: DirectiveName -> Maybe String
|
||||||
field = hledgerField rules record :: HledgerFieldName -> Maybe FieldTemplate
|
field = hledgerField rules record :: HledgerFieldName -> Maybe FieldTemplate
|
||||||
fieldval = hledgerFieldValue rules record :: HledgerFieldName -> Maybe Text
|
fieldval = hledgerFieldValue rules record :: HledgerFieldName -> Maybe Text
|
||||||
parsedate = parseDateWithCustomOrDefaultFormats (rule "date-format")
|
mdateformat = rule "date-format"
|
||||||
|
parsedate = parseDateWithCustomOrDefaultFormats timesarezoned mtzin tzout mdateformat
|
||||||
mkdateerror datefield datevalue mdateformat' = T.unpack $ T.unlines
|
mkdateerror datefield datevalue mdateformat' = T.unpack $ T.unlines
|
||||||
["error: could not parse \""<>datevalue<>"\" as a date using date format "
|
["error: could not parse \""<>datevalue<>"\" as a date using date format "
|
||||||
<>maybe "\"YYYY/M/D\", \"YYYY-M-D\" or \"YYYY.M.D\"" (T.pack . show) mdateformat'
|
<>maybe "\"YYYY/M/D\", \"YYYY-M-D\" or \"YYYY.M.D\"" (T.pack . show) mdateformat'
|
||||||
@ -887,7 +901,6 @@ transactionFromCsvRecord sourcepos rules record = t
|
|||||||
-- field assignment rules using the CSV record's data, and parsing a bit
|
-- field assignment rules using the CSV record's data, and parsing a bit
|
||||||
-- more where needed (dates, status).
|
-- more where needed (dates, status).
|
||||||
|
|
||||||
mdateformat = rule "date-format"
|
|
||||||
date = fromMaybe "" $ fieldval "date"
|
date = fromMaybe "" $ fieldval "date"
|
||||||
-- PARTIAL:
|
-- PARTIAL:
|
||||||
date' = fromMaybe (error' $ mkdateerror "date" date mdateformat) $ parsedate date
|
date' = fromMaybe (error' $ mkdateerror "date" date mdateformat) $ parsedate date
|
||||||
@ -1320,11 +1333,45 @@ csvFieldValue rules record fieldname = do
|
|||||||
|
|
||||||
-- | Parse the date string using the specified date-format, or if unspecified
|
-- | Parse the date string using the specified date-format, or if unspecified
|
||||||
-- the "simple date" formats (YYYY/MM/DD, YYYY-MM-DD, YYYY.MM.DD, leading
|
-- the "simple date" formats (YYYY/MM/DD, YYYY-MM-DD, YYYY.MM.DD, leading
|
||||||
-- zeroes optional).
|
-- zeroes optional). If a timezone is provided, we assume the DateFormat
|
||||||
parseDateWithCustomOrDefaultFormats :: Maybe DateFormat -> Text -> Maybe Day
|
-- produces a zoned time and we localise that to the given timezone.
|
||||||
parseDateWithCustomOrDefaultFormats mformat s = asum $ map parsewith' formats
|
parseDateWithCustomOrDefaultFormats :: Bool -> Maybe TimeZone -> TimeZone -> Maybe DateFormat -> Text -> Maybe Day
|
||||||
|
parseDateWithCustomOrDefaultFormats timesarezoned mtzin tzout mformat s = localdate <$> mutctime
|
||||||
|
-- this time code can probably be simpler, I'm just happy to get out alive
|
||||||
where
|
where
|
||||||
parsewith' = flip (parseTimeM True defaultTimeLocale) (T.unpack s)
|
localdate :: UTCTime -> Day =
|
||||||
|
localDay .
|
||||||
|
dbg7 ("time in output timezone "++show tzout) .
|
||||||
|
utcToLocalTime tzout
|
||||||
|
mutctime :: Maybe UTCTime = asum $ map parseWithFormat formats
|
||||||
|
|
||||||
|
parseWithFormat :: String -> Maybe UTCTime
|
||||||
|
parseWithFormat fmt =
|
||||||
|
if timesarezoned
|
||||||
|
then
|
||||||
|
dbg7 "zoned CSV time, expressed as UTC" $
|
||||||
|
parseTimeM True defaultTimeLocale fmt $ T.unpack s :: Maybe UTCTime
|
||||||
|
else
|
||||||
|
-- parse as a local day and time; then if an input timezone is provided,
|
||||||
|
-- assume it's in that, otherwise assume it's in the output timezone;
|
||||||
|
-- then convert to UTC like the above
|
||||||
|
let
|
||||||
|
mlocaltime =
|
||||||
|
fmap (dbg7 "unzoned CSV time") $
|
||||||
|
parseTimeM True defaultTimeLocale fmt $ T.unpack s :: Maybe LocalTime
|
||||||
|
localTimeAsZonedTime tz lt = ZonedTime lt tz
|
||||||
|
in
|
||||||
|
case mtzin of
|
||||||
|
Just tzin ->
|
||||||
|
(dbg7 ("unzoned CSV time, declared as "++show tzin++ ", expressed as UTC") .
|
||||||
|
localTimeToUTC tzin)
|
||||||
|
<$> mlocaltime
|
||||||
|
Nothing ->
|
||||||
|
(dbg7 ("unzoned CSV time, treated as "++show tzout++ ", expressed as UTC") .
|
||||||
|
zonedTimeToUTC .
|
||||||
|
localTimeAsZonedTime tzout)
|
||||||
|
<$> mlocaltime
|
||||||
|
|
||||||
formats = map T.unpack $ maybe
|
formats = map T.unpack $ maybe
|
||||||
["%Y/%-m/%-d"
|
["%Y/%-m/%-d"
|
||||||
,"%Y-%-m-%-d"
|
,"%Y-%-m-%-d"
|
||||||
|
|||||||
@ -4592,8 +4592,9 @@ date-format DATEFMT
|
|||||||
```
|
```
|
||||||
This is a helper for the `date` (and `date2`) fields.
|
This is a helper for the `date` (and `date2`) fields.
|
||||||
If your CSV dates are not formatted like `YYYY-MM-DD`, `YYYY/MM/DD` or `YYYY.MM.DD`,
|
If your CSV dates are not formatted like `YYYY-MM-DD`, `YYYY/MM/DD` or `YYYY.MM.DD`,
|
||||||
you'll need to add a date-format rule describing them with a
|
you'll need to add a date-format rule describing them with a strptime-style date parsing pattern -
|
||||||
strptime date parsing pattern, which must parse the CSV date value completely.
|
see <https://hackage.haskell.org/package/time/docs/Data-Time-Format.html#v:formatTime>.
|
||||||
|
The pattern must parse the CSV date value completely.
|
||||||
Some examples:
|
Some examples:
|
||||||
``` rules
|
``` rules
|
||||||
# MM/DD/YY
|
# MM/DD/YY
|
||||||
@ -4613,14 +4614,33 @@ date-format %Y-%h-%d
|
|||||||
# Note the time and junk must be fully parsed, though only the date is used.
|
# Note the time and junk must be fully parsed, though only the date is used.
|
||||||
date-format %-m/%-d/%Y %l:%M %p some other junk
|
date-format %-m/%-d/%Y %l:%M %p some other junk
|
||||||
```
|
```
|
||||||
For the supported strptime syntax, see:\
|
|
||||||
<https://hackage.haskell.org/package/time/docs/Data-Time-Format.html#v:formatTime>
|
|
||||||
|
|
||||||
Note that although you can parse date-times which include a time zone,
|
### `timezone`
|
||||||
that time zone is ignored; it will not change the date that is parsed.
|
|
||||||
This means when reading CSV data with times not in your local time zone,
|
|
||||||
dates can be "off by one".
|
|
||||||
|
|
||||||
|
```rules
|
||||||
|
timezone TIMEZONE
|
||||||
|
```
|
||||||
|
|
||||||
|
When CSV contains date-times that are implicitly in some time zone
|
||||||
|
other than yours, but containing no explicit time zone information,
|
||||||
|
you can use this rule to declare the CSV's native time zone,
|
||||||
|
which helps prevent off-by-one dates.
|
||||||
|
|
||||||
|
When the CSV date-times do contain time zone information,
|
||||||
|
you don't need this rule; instead, use `%Z` in `date-format`
|
||||||
|
(or `%z`, `%EZ`, `%Ez`; see the formatTime link above).
|
||||||
|
|
||||||
|
In either of these cases, hledger will do a time-zone-aware conversion,
|
||||||
|
localising the CSV date-times to your current system time zone.
|
||||||
|
If you prefer to localise to some other time zone, eg for reproducibility,
|
||||||
|
you can (on unix at least) set the output timezone with the TZ environment variable, eg:
|
||||||
|
```shell
|
||||||
|
$ TZ=HST hledger print -f foo.csv # or TZ=HST hledger import foo.csv
|
||||||
|
```
|
||||||
|
|
||||||
|
`timezone` currently does not understand timezone names, except
|
||||||
|
"UTC", "GMT", "EST", "EDT", "CST", "CDT", "MST", "MDT", "PST", or "PDT".
|
||||||
|
For others, use numeric format: +HHMM or -HHMM.
|
||||||
|
|
||||||
### `decimal-mark`
|
### `decimal-mark`
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user