Compare commits

...

5 Commits

Author SHA1 Message Date
2d5bec9a2d
Lue media suhteessa tiedotteen polkuun 2024-04-26 10:46:13 +03:00
eddbceba67
Liitä mediatiedostot viesteihin
HTML-sähköpostit voivat linkata liitteisiin, jolloin viesti on
itsenäinen. Viestin sisäisiin kuviin linkkaaminen toimii yleensä
sähköpostiohjelmissa ulkoisia linkkejä paremmin, koska ulkoisten
linkkien seuraaminen on yksityisyysriski.
2024-04-25 14:21:49 +03:00
373e34a9e4
Lisää ReadM-monadi PandocIO-operaatioille
Tällä saadaan pandoc lukemaan tiedostot filestoren Git-tietovarannosta
sekä estettyä niiden lukeminen muualta.
2024-04-25 14:21:49 +03:00
dcf00955e9
Tuo 'Text.Pandoc' omaan 'Pandoc' nimiavaruuteen 2024-04-25 12:46:10 +03:00
20775ae1d5
Lue vain .md-tiedostot tiedotteina
Tämä on yksinkertainen ja nopea tapa estää tiedotejärjestelmää
tukehtumasta tietovarannosta löytyviin binääritiedostoihin, kuten
kuviin.
2024-04-25 12:45:18 +03:00
7 changed files with 197 additions and 43 deletions

View File

@ -26,8 +26,11 @@
#:select? vcs-file?))
(build-system haskell-build-system)
(inputs (list ghc-acid-state
ghc-attoparsec
ghc-base64
ghc-cryptonite
ghc-case-insensitive
ghc-glob
ghc-purebred-email
ghc-optparse-applicative
ghc-filestore

View File

@ -26,7 +26,7 @@ import Web.Scotty (scotty, post, liftAndCatchIO, defaultHandler)
import qualified Data.Text as T
import qualified Data.Text.Lazy.IO as LT
import TiedoteMD.Read
import TiedoteMD.Read (updateMessages)
import TiedoteMD.Review
import TiedoteMD.Send
import TiedoteMD.State

View File

@ -1,35 +1,51 @@
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE RecordWildCards #-}
module TiedoteMD.Read where
import Control.Exception (throwIO)
import Control.Exception (throwIO, catch)
import Control.Monad ((>=>), join, unless)
import Control.Monad.IO.Class (liftIO)
import Control.Monad.IO.Class (liftIO, MonadIO)
import Control.Monad.Reader (ReaderT, MonadReader, runReaderT, asks)
import Control.Monad.Except (MonadError, throwError)
import Control.Monad.Trans (lift)
import Crypto.Hash (hashWith, SHA256(..))
import Data.Acid (AcidState, update)
import Data.Bifunctor (first, second)
import Data.ByteArray (convert)
import Data.ByteString (ByteString)
import "base64" Data.ByteString.Base64.URL
import Data.Default (def)
import Data.Either (rights, lefts)
import Data.FileStore (FileStore(..), gitFileStore)
import Data.List (singleton)
import Data.Either (rights, lefts, fromRight)
import Data.FileStore (FileStore(..), Revision(..), FileStoreError(..), gitFileStore)
import Data.List (singleton, isSuffixOf)
import Data.MIME (ContentID, makeContentID, renderContentID)
import Data.Maybe (fromMaybe)
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
import Data.Time (UTCTime, zonedTimeToUTC)
import Data.Time (getCurrentTime)
import Data.Time.Format.ISO8601 (iso8601ParseM)
import System.Exit (ExitCode(..))
import System.FilePath ((</>), takeDirectory, normalise)
import System.FilePath.Glob (match, compile)
import System.IO (hClose)
import System.Process (createProcess, proc, waitForProcess, CreateProcess(..), StdStream(..))
import Text.Pandoc (Pandoc(..), ReaderOptions(..), WriterOptions(..), Meta(..), MetaValue(..), nullMeta, Inline, Block(Plain), runIO, lookupMeta, runPure, pandocExtensions)
import Text.Pandoc (Pandoc(..), PandocMonad, PandocIO, PandocError(..))
import Text.Pandoc.MediaBag (MediaItem(..), lookupMedia, mediaItems)
import Text.Pandoc.Readers (readMarkdown)
import Text.Pandoc.Walk (walkM)
import Text.Pandoc.Writers (writePlain, writeMarkdown, writeHtml5String)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as LBS
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Text.Pandoc as Pandoc
import TiedoteMD.Git
import TiedoteMD.State
@ -43,22 +59,106 @@ updateMessages acid repoPath =
readMessageFiles :: FileStore -> IO [Message]
readMessageFiles store = do
files <- index store
messages <- mapM (readMessageFile store) files
messages <- mapM (readMessageFile store) $ filter (".md" `isSuffixOf`) files
mapM_ (T.putStrLn . renderError) $ lefts messages
currentTime <- getCurrentTime
print currentTime
pure $ filter ((currentTime <) . sendTime) $ rights messages
-- TODO: files could be read in parallel instead of sequentially with mapM
newtype ReadM a = ReadM (ReaderT ReadMState PandocIO a)
deriving (Functor, Applicative, Monad, MonadIO, MonadReader ReadMState, MonadError PandocError)
data ReadMState = ReadMState
{ fileStoreHandle :: FileStore
, currentFile :: FilePath
}
liftPandoc :: PandocIO a -> ReadM a
liftPandoc = ReadM . lift
runReadM :: FileStore -> FilePath -> ReadM a -> IO (Either PandocError a)
runReadM store file (ReadM m) = Pandoc.runIO $ runReaderT m $ ReadMState store file
readStoreFile :: FilePath -> ReadM LBS.ByteString
readStoreFile file = do
store <- asks fileStoreHandle
errorOrFile <- liftIO $ (Right <$> retrieve store file Nothing)
`catch` \NotFound -> pure $ Left $ PandocResourceNotFound $ T.pack file
either throwError pure errorOrFile
getCurrentDirectory :: ReadM FilePath
getCurrentDirectory = takeDirectory <$> asks currentFile
getRealPath :: FilePath -> ReadM FilePath
getRealPath file = normalise . (</> file) <$> getCurrentDirectory
instance PandocMonad ReadM where
readFileLazy = getRealPath >=> readStoreFile
readFileStrict = getRealPath >=> fmap LBS.toStrict . readStoreFile
getDataFileName = pure
getModificationTime file = do
store <- asks fileStoreHandle
file' <- getRealPath file
liftIO $ fmap revDateTime . revision store =<< latest store file'
fileExists file = do
store <- asks fileStoreHandle
file' <- getRealPath file
liftIO $ (const True <$> latest store file') `catch` \NotFound -> pure False
glob pattern = do
store <- asks fileStoreHandle
directory <- getCurrentDirectory
filter (match $ compile $ directory </> pattern) <$> liftIO (index store)
lookupEnv = liftPandoc . Pandoc.lookupEnv
getCurrentTime = liftPandoc Pandoc.getCurrentTime
getCurrentTimeZone = liftPandoc Pandoc.getCurrentTimeZone
newStdGen = liftPandoc Pandoc.newStdGen
newUniqueHash = liftPandoc Pandoc.newUniqueHash
openURL = liftPandoc . Pandoc.openURL
readStdinStrict = pure mempty
getCommonState = liftPandoc Pandoc.getCommonState
putCommonState = liftPandoc . Pandoc.putCommonState
logOutput = liftPandoc . Pandoc.logOutput
readMessageFile :: FileStore -> FilePath -> IO (Either Error Message)
readMessageFile store = flip (retrieve store) Nothing >=>
uncurry parseMessageFile . addHash . T.filter (/= '\r') . decodeUtf8 . BS.toStrict
readMessageFile store file = retrieve store file Nothing >>=
uncurry (parseMessageFile store file) . addHash . T.filter (/= '\r') . decodeUtf8 . BS.toStrict
where addHash text = (convert $ hashWith SHA256 $ encodeUtf8 text, text)
parseMessageFile :: ByteString -> T.Text -> IO (Either Error Message)
parseMessageFile hash text = fmap (join . first PandocError) . runIO $ do
cidOf :: MediaItem -> ContentID
cidOf MediaItem {mediaContents, mediaMimeType} =
fromRight (error "makeContentID failed with valid input!") $
makeContentID $ "<" <> encodedHash <> "@tiedote.md.sha256>"
where encodedHash = encodeBase64' $ convert $ hashWith SHA256 $
LBS.toStrict mediaContents <> encodeUtf8 mediaMimeType
cidUrl :: ContentID -> T.Text
cidUrl = mappend "cid:" . stripAngleBrackets . decodeUtf8 . renderContentID
where stripAngleBrackets = T.init . T.tail
-- The string is always surrounded by < > so this is safe
makeMediaPart :: MediaItem -> MediaPart
makeMediaPart mediaItem@MediaItem {..} = MediaPart
{ mediaPartMimeType = encodeUtf8 mediaMimeType
, mediaPartContentID = renderContentID $ cidOf mediaItem
, mediaPartContents = LBS.toStrict mediaContents
}
reconstructMediaItem :: (FilePath, T.Text, LBS.ByteString) -> MediaItem
reconstructMediaItem (mediaPath, mediaMimeType, mediaContents) = MediaItem {..}
replaceImagesWithCid :: PandocMonad m => Pandoc -> m Pandoc
replaceImagesWithCid = Pandoc.fillMediaBag >=> walkM handleImage
where handleImage (Pandoc.Image attr lab (src, title)) = do
mediaItem <- (\media -> fromMaybe (error $ "fillMediaBag left an image uncollected! impossible!")
$ lookupMedia (T.unpack src) media) <$> Pandoc.getMediaBag
pure $ Pandoc.Image attr lab (cidUrl $ cidOf mediaItem, title)
handleImage x = pure x
parseMessageFile :: FileStore -> FilePath -> ByteString -> T.Text -> IO (Either Error Message)
parseMessageFile store file hash text = fmap (join . first PandocError) . runReadM store file $ do
pandoc@(Pandoc meta _) <- flip readMarkdown text
def {readerStandalone = True, readerExtensions = pandocExtensions}
def {Pandoc.readerStandalone = True, Pandoc.readerExtensions = Pandoc.pandocExtensions} >>= replaceImagesWithCid
let tiedoteMeta = do
previewTo <- lookupMeta' "tarkistaja" meta >>= metaToEmails
previewTime <- lookupMeta' "deadline" meta >>= metaToTime
@ -71,9 +171,10 @@ parseMessageFile hash text = fmap (join . first PandocError) . runIO $ do
plainTextMessage <- renderHelper writePlain plainTemplate pandoc
markdownMessage <- renderHelper writeMarkdown markdownTemplate pandoc
let htmlPandoc = let Pandoc meta' blocks' = pandoc in Pandoc
(Meta $ Map.insertWith (flip const) "pagetitle" (MetaString subject) $ unMeta meta')
(Pandoc.Meta $ Map.insertWith (flip const) "pagetitle" (Pandoc.MetaString subject) $ Pandoc.unMeta meta')
blocks'
htmlMessage <- liftIO . inlineCSS =<< renderHelper writeHtml5String htmlTemplate htmlPandoc
mediaParts <- Set.fromList . map (makeMediaPart . reconstructMediaItem) . mediaItems <$> Pandoc.getMediaBag
pure $ pure $ Message
{ recipients = []
, messageHash = hash
@ -81,13 +182,12 @@ parseMessageFile hash text = fmap (join . first PandocError) . runIO $ do
, previewMailID = Nothing
, ..
}
where renderHelper writer template = writer (writerOpts {writerTemplate = Just template})
where renderHelper writer template = writer (writerOpts {Pandoc.writerTemplate = Just template})
writerOpts = def
{ writerTOCDepth = 2
, writerTableOfContents = True
, writerSectionDivs = True
{ Pandoc.writerTOCDepth = 2
, Pandoc.writerTableOfContents = True
, Pandoc.writerSectionDivs = True
}
-- TODO: Store the media somewhere
inlineCSS :: T.Text -> IO T.Text
inlineCSS html = do
@ -100,7 +200,7 @@ inlineCSS html = do
unless (exitCode == ExitSuccess) $ throwIO $ ProcessError "douceur" exitCode
pure inlined
metaToTime :: MetaValue -> Either Error UTCTime
metaToTime :: Pandoc.MetaValue -> Either Error UTCTime
metaToTime meta = do
textMeta <- metaToTexts meta >>= headOrError
maybe (Left $ InvalidTime textMeta) pure $
@ -108,21 +208,21 @@ metaToTime meta = do
where headOrError (x:_) = pure x
headOrError _ = Left $ InvalidTime ""
metaToEmails :: MetaValue -> Either Error [Email]
metaToEmails :: Pandoc.MetaValue -> Either Error [Email]
metaToEmails meta = metaToTexts meta >>= mapM
(\text -> maybe (Left $ InvalidEmail text) pure $ email $ encodeUtf8 $ T.strip text)
metaToText :: MetaValue -> Either Error T.Text
metaToText :: Pandoc.MetaValue -> Either Error T.Text
metaToText = second T.unwords . metaToTexts
metaToTexts :: MetaValue -> Either Error [T.Text]
metaToTexts (MetaString text) = pure [text]
metaToTexts (MetaInlines inlines) = second singleton $ inlinesToText inlines
metaToTexts (MetaList metas) = second concat $ mapM metaToTexts metas
metaToTexts :: Pandoc.MetaValue -> Either Error [T.Text]
metaToTexts (Pandoc.MetaString text) = pure [text]
metaToTexts (Pandoc.MetaInlines inlines) = second singleton $ inlinesToText inlines
metaToTexts (Pandoc.MetaList metas) = second concat $ mapM metaToTexts metas
metaToTexts _ = Left InvalidData
inlinesToText :: [Inline] -> Either Error T.Text
inlinesToText = first PandocError . runPure . writeMarkdown def{writerExtensions = pandocExtensions} . Pandoc nullMeta . singleton . Plain
inlinesToText :: [Pandoc.Inline] -> Either Error T.Text
inlinesToText = first PandocError . Pandoc.runPure . writeMarkdown def{Pandoc.writerExtensions = Pandoc.pandocExtensions} . Pandoc Pandoc.nullMeta . singleton . Pandoc.Plain
lookupMeta' :: T.Text -> Meta -> Either Error MetaValue
lookupMeta' key meta = maybe (Left $ NoMeta key) Right $ lookupMeta key meta
lookupMeta' :: T.Text -> Pandoc.Meta -> Either Error Pandoc.MetaValue
lookupMeta' key meta = maybe (Left $ NoMeta key) Right $ Pandoc.lookupMeta key meta

View File

@ -54,7 +54,8 @@ managePreviews acid sender sendmailPath = forever $ do
Just msg@(Message {..}) -> do
mailID <- uniqueMailID sender
boundary <- getStdRandom uniform
let mail = renderMessage' "Esikatselu: " (Just mailID) msg sender boundary
boundary' <- getStdRandom uniform
let mail = renderMessage' "Esikatselu: " (Just mailID) msg sender boundary boundary'
sendmail sendmailPath $ toLazyByteString $ buildMessage $
set (headerTo defaultCharsets) (map (Single . emailToMailbox) previewTo) mail
update acid $ SetPreviewID messageHash mailID

View File

@ -5,12 +5,14 @@ module TiedoteMD.Send where
import Control.Concurrent (threadDelay)
import Control.Exception (throwIO)
import Control.Lens (set)
import Control.Lens (set, _Just)
import Control.Monad (forever, unless)
import Data.Acid (AcidState, query, update)
import Data.Attoparsec.ByteString (endOfInput, parseOnly)
import Data.Binary.Builder (toLazyByteString)
import Data.MIME (Address(..), Mailbox, Boundary, MIMEMessage, MIME(..), Headers(..), MultipartSubtype(..), buildMessage, headerTo, headerSubject, headerFrom, headerMessageID, header, createTextPlainMessage, contentType)
import Data.MIME (Address(..), Mailbox, Boundary, MIMEMessage, MIME(..), Headers(..), MultipartSubtype(..), ContentTypeWith(..), DispositionType(..), buildMessage, headerTo, headerSubject, headerFrom, headerMessageID, headerContentID, header, createTextPlainMessage, contentType, contentDisposition, dispositionType, createAttachment, parseContentType, makeContentID)
import Data.MIME.Charset (defaultCharsets)
import Data.Set (Set)
import Data.Time (getCurrentTime)
import System.Exit (ExitCode(..))
import System.Exit.Codes (codeTempFail)
@ -21,6 +23,7 @@ import System.Random (getStdRandom, uniform)
import qualified Data.ByteString.Lazy as LBS
import qualified Data.IMF as IMF
import qualified Data.List.NonEmpty as NE
import qualified Data.Set as Set
import qualified Data.Text as T
import TiedoteMD.State
@ -42,7 +45,8 @@ manageQueue acid sender sendmailPath = forever $ do
Nothing -> pure ()
Just (address, message) -> do
boundary <- getStdRandom uniform
let mail = renderMessage message sender boundary
boundary' <- getStdRandom uniform
let mail = renderMessage message sender boundary boundary'
sendmail sendmailPath $ toLazyByteString $ buildMessage $
set (headerTo defaultCharsets) [Single $ emailToMailbox address] mail
update acid MarkMessageAsSent
@ -58,11 +62,11 @@ manageQueueingMessages acid = forever $ do
queueMessages :: AcidState State -> IO ()
queueMessages acid = getCurrentTime >>= update acid . MoveToSendQueue
renderMessage :: Message -> Mailbox -> Boundary -> MIMEMessage
renderMessage :: Message -> Mailbox -> Boundary -> Boundary -> MIMEMessage
renderMessage = renderMessage' "" Nothing
renderMessage' :: T.Text -> Maybe MailID -> Message -> Mailbox -> Boundary -> MIMEMessage
renderMessage' subjectPrefix maybeMailID (Message {messageContent = MessageContent {..},..}) sender boundary =
renderMessage' :: T.Text -> Maybe MailID -> Message -> Mailbox -> Boundary -> Boundary -> MIMEMessage
renderMessage' subjectPrefix maybeMailID (Message {messageContent = MessageContent {..},..}) sender boundary boundary' =
maybe id (set headerMessageID . Just . mailIDToMessageID) maybeMailID $
set (headerSubject defaultCharsets) (Just $ subjectPrefix <> subject) $
set (header "Precedence") "Bulk" $
@ -71,12 +75,25 @@ renderMessage' subjectPrefix maybeMailID (Message {messageContent = MessageConte
IMF.Message (Headers []) $ Multipart Alternative boundary $ NE.fromList
[ createTextPlainMessage plainTextMessage
, createTextMarkdownMessage markdownMessage
, createTextHtmlMessage htmlMessage
, createTextHtmlMessage boundary' mediaParts htmlMessage
]
createTextMarkdownMessage :: T.Text -> MIMEMessage
createTextMarkdownMessage = set contentType "text/markdown; charset=utf-8; variant=pandoc" . createTextPlainMessage
createTextHtmlMessage :: T.Text -> MIMEMessage
createTextHtmlMessage = set contentType "text/html; charset=utf-8" . createTextPlainMessage
-- TODO: multipart/related with media
createTextHtmlMessage :: Boundary -> Set MediaPart -> T.Text -> MIMEMessage
createTextHtmlMessage boundary mediaParts html = IMF.Message (Headers []) $ multipartRelated $
(set contentType "text/html; charset=utf-8" $ createTextPlainMessage html) NE.:| mediaAttachments
where multipartRelated = Multipart
(Related (Just $ ContentType "text" "html" ()) Nothing Nothing) boundary
mediaAttachments = mediaPartToAttachment <$> Set.toList mediaParts
mediaPartToAttachment :: MediaPart -> MIMEMessage
mediaPartToAttachment MediaPart {..} =
set (contentDisposition . _Just . dispositionType) Inline $
set headerContentID (Just contentID) $
createAttachment mimeType Nothing mediaPartContents
where mimeType = either (error "purebred-email couldn't parse pandoc's mime type!") id $
parseOnly (parseContentType <* endOfInput) mediaPartMimeType
contentID = either (error "purebred-email couldn't parse it's own contentID!") id $
makeContentID mediaPartContentID

View File

@ -1,11 +1,14 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE TypeFamilies #-}
module TiedoteMD.Types
( Email(..)
, Error(..)
, MailID
, MediaPart(..)
, Message(..)
, MessageContent(..)
, SendJob(..)
@ -28,8 +31,9 @@ import Data.CaseInsensitive (original, mk)
import Data.IMF (MessageID, Mailbox(..), AddrSpec(..), Domain(..), mailbox, parseMessageID, parse, renderMessageID, renderMailbox)
import Data.List.NonEmpty (NonEmpty)
import Data.MIME.Charset (defaultCharsets)
import Data.SafeCopy (base, deriveSafeCopy)
import Data.SafeCopy (Migrate(..), base, deriveSafeCopy, extension)
import Data.Semigroup (sconcat)
import Data.Set (Set)
import Data.Text (Text)
import Data.Text.Encoding (encodeUtf8, decodeUtf8)
import Data.Time (UTCTime, getCurrentTime, defaultTimeLocale, formatTime)
@ -88,6 +92,23 @@ data Message = Message
} deriving (Show, Eq, Typeable)
data MessageContent = MessageContent
{ plainTextMessage :: Text
, markdownMessage :: Text
, htmlMessage :: Text
, mediaParts :: Set MediaPart
} deriving (Show, Eq, Typeable)
data MediaPart = MediaPart
-- XXX Ideally purebred-email would implement safecopy and we could use proper types
{ mediaPartMimeType :: ByteString -- ContentTypeWith Parameters
, mediaPartContentID :: ByteString -- ContentID
, mediaPartContents :: ByteString
} deriving (Show, Typeable, Ord)
instance Eq MediaPart where
MediaPart {mediaPartContentID = a} == MediaPart {mediaPartContentID = b} = a == b
data MessageContent_v0 = MessageContent_v0
{ plainTextMessage :: Text
, markdownMessage :: Text
, htmlMessage :: Text
@ -148,7 +169,14 @@ renderError (FileNotFoundError path) = T.pack path <> " not found"
deriveSafeCopy 0 'base ''Domain'
deriveSafeCopy 0 'base ''Email
deriveSafeCopy 0 'base ''MessageContent
deriveSafeCopy 0 'base ''MessageContent_v0
deriveSafeCopy 0 'base ''MediaPart
instance Migrate MessageContent where
type MigrateFrom MessageContent = MessageContent_v0
migrate MessageContent_v0 {..} = MessageContent {mediaParts = mempty, ..}
deriveSafeCopy 1 'extension ''MessageContent
deriveSafeCopy 0 'base ''MailID
deriveSafeCopy 0 'base ''Message
deriveSafeCopy 0 'base ''SendJob

View File

@ -23,7 +23,9 @@ source-repository head
executable tiedote.md
build-depends:
acid-state,
attoparsec,
base,
base64,
binary,
bytestring,
case-insensitive,
@ -35,7 +37,9 @@ executable tiedote.md
doctemplates,
exit-codes,
file-embed,
filepath,
filestore,
Glob,
hostname,
lens,
memory,
@ -43,6 +47,7 @@ executable tiedote.md
network,
optparse-applicative,
pandoc,
pandoc-types,
process,
purebred-email,
random,