1// Package queue is in charge of outgoing messages, queueing them when submitted,
2// attempting a first delivery over SMTP, retrying with backoff and sending DSNs
3// for delayed or failed deliveries.
18 "golang.org/x/net/proxy"
20 "github.com/prometheus/client_golang/prometheus"
21 "github.com/prometheus/client_golang/prometheus/promauto"
23 "github.com/mjl-/bstore"
25 "github.com/mjl-/mox/config"
26 "github.com/mjl-/mox/dns"
27 "github.com/mjl-/mox/dsn"
28 "github.com/mjl-/mox/metrics"
29 "github.com/mjl-/mox/mlog"
30 "github.com/mjl-/mox/mox-"
31 "github.com/mjl-/mox/moxio"
32 "github.com/mjl-/mox/smtp"
33 "github.com/mjl-/mox/smtpclient"
34 "github.com/mjl-/mox/store"
35 "github.com/mjl-/mox/tlsrpt"
36 "github.com/mjl-/mox/tlsrptdb"
39var xlog = mlog.New("queue")
42 metricConnection = promauto.NewCounterVec(
43 prometheus.CounterOpts{
44 Name: "mox_queue_connection_total",
45 Help: "Queue client connections, outgoing.",
48 "result", // "ok", "timeout", "canceled", "error"
51 metricDelivery = promauto.NewHistogramVec(
52 prometheus.HistogramOpts{
53 Name: "mox_queue_delivery_duration_seconds",
54 Help: "SMTP client delivery attempt to single host.",
55 Buckets: []float64{0.01, 0.05, 0.100, 0.5, 1, 5, 10, 20, 30, 60, 120},
58 "attempt", // Number of attempts.
59 "transport", // empty for default direct delivery.
60 "tlsmode", // immediate, requiredstarttls, opportunistic, skip (from smtpclient.TLSMode), with optional +mtasts and/or +dane.
61 "result", // ok, timeout, canceled, temperror, permerror, error
66var jitter = mox.NewPseudoRand()
68var DBTypes = []any{Msg{}} // Types stored in DB.
69var DB *bstore.DB // Exported for making backups.
71// Set for mox localserve, to prevent queueing.
74// Msg is a message in the queue.
76// Use MakeMsg to make a message with fields that Add needs. Add will further set
77// queueing related fields.
80 Queued time.Time `bstore:"default now"`
81 SenderAccount string // Failures are delivered back to this local account. Also used for routing.
82 SenderLocalpart smtp.Localpart // Should be a local user and domain.
83 SenderDomain dns.IPDomain
84 RecipientLocalpart smtp.Localpart // Typically a remote user and domain.
85 RecipientDomain dns.IPDomain
86 RecipientDomainStr string // For filtering.
87 Attempts int // Next attempt is based on last attempt and exponential back off based on attempts.
88 MaxAttempts int // Max number of attempts before giving up. If 0, then the default of 8 attempts is used instead.
89 DialedIPs map[string][]net.IP // For each host, the IPs that were dialed. Used for IP selection for later attempts.
90 NextAttempt time.Time // For scheduling.
91 LastAttempt *time.Time
94 Has8bit bool // Whether message contains bytes with high bit set, determines whether 8BITMIME SMTP extension is needed.
95 SMTPUTF8 bool // Whether message requires use of SMTPUTF8.
96 IsDMARCReport bool // Delivery failures for DMARC reports are handled differently.
97 IsTLSReport bool // Delivery failures for TLS reports are handled differently.
98 Size int64 // Full size of message, combined MsgPrefix with contents of message file.
99 MessageID string // Used when composing a DSN, in its References header.
102 // If set, this message is a DSN and this is a version using utf-8, for the case
103 // the remote MTA supports smtputf8. In this case, Size and MsgPrefix are not
107 // If non-empty, the transport to use for this message. Can be set through cli or
108 // admin interface. If empty (the default for a submitted message), regular routing
112 // RequireTLS influences TLS verification during delivery.
114 // If nil, the recipient domain policy is followed (MTA-STS and/or DANE), falling
115 // back to optional opportunistic non-verified STARTTLS.
117 // If RequireTLS is true (through SMTP REQUIRETLS extension or webmail submit),
118 // MTA-STS or DANE is required, as well as REQUIRETLS support by the next hop
121 // If RequireTLS is false (through messag header "TLS-Required: No"), the recipient
122 // domain's policy is ignored if it does not lead to a successful TLS connection,
123 // i.e. falling back to SMTP delivery with unverified STARTTLS or plain text.
128// Sender of message as used in MAIL FROM.
129func (m Msg) Sender() smtp.Path {
130 return smtp.Path{Localpart: m.SenderLocalpart, IPDomain: m.SenderDomain}
133// Recipient of message as used in RCPT TO.
134func (m Msg) Recipient() smtp.Path {
135 return smtp.Path{Localpart: m.RecipientLocalpart, IPDomain: m.RecipientDomain}
138// MessagePath returns the path where the message is stored.
139func (m Msg) MessagePath() string {
140 return mox.DataDirPath(filepath.Join("queue", store.MessagePath(m.ID)))
143// Init opens the queue database without starting delivery.
145 qpath := mox.DataDirPath(filepath.FromSlash("queue/index.db"))
146 os.MkdirAll(filepath.Dir(qpath), 0770)
148 if _, err := os.Stat(qpath); err != nil && os.IsNotExist(err) {
153 DB, err = bstore.Open(mox.Shutdown, qpath, &bstore.Options{Timeout: 5 * time.Second, Perm: 0660}, DBTypes...)
158 return fmt.Errorf("open queue database: %s", err)
163// Shutdown closes the queue database. The delivery process isn't stopped. For tests only.
166 xlog.Check(err, "closing queue db")
170// List returns all messages in the delivery queue.
171// Ordered by earliest delivery attempt first.
172func List(ctx context.Context) ([]Msg, error) {
173 qmsgs, err := bstore.QueryDB[Msg](ctx, DB).List()
177 sort.Slice(qmsgs, func(i, j int) bool {
180 la := a.LastAttempt != nil
181 lb := b.LastAttempt != nil
184 } else if la && !lb {
187 if !la && !lb || a.LastAttempt.Equal(*b.LastAttempt) {
190 return a.LastAttempt.Before(*b.LastAttempt)
195// Count returns the number of messages in the delivery queue.
196func Count(ctx context.Context) (int, error) {
197 return bstore.QueryDB[Msg](ctx, DB).Count()
200// MakeMsg is a convenience function that sets the commonly used fields for a Msg.
201func MakeMsg(senderAccount string, sender, recipient smtp.Path, has8bit, smtputf8 bool, size int64, messageID string, prefix []byte, requireTLS *bool) Msg {
203 SenderAccount: mox.Conf.Static.Postmaster.Account,
204 SenderLocalpart: sender.Localpart,
205 SenderDomain: sender.IPDomain,
206 RecipientLocalpart: recipient.Localpart,
207 RecipientDomain: recipient.IPDomain,
211 MessageID: messageID,
213 RequireTLS: requireTLS,
217// Add a new message to the queue. The queue is kicked immediately to start a
218// first delivery attempt.
220// ID must be 0 and will be set after inserting in the queue.
222// Add sets derived fields like RecipientDomainStr, and fields related to queueing,
223// such as Queued, NextAttempt, LastAttempt, LastError.
224func Add(ctx context.Context, log *mlog.Log, qm *Msg, msgFile *os.File) error {
225 // todo: Add should accept multiple rcptTo if they are for the same domain. so we can queue them for delivery in one (or just a few) session(s), transferring the data only once.
../rfc/5321:3759
228 return fmt.Errorf("id of queued message must be 0")
230 qm.Queued = time.Now()
232 qm.NextAttempt = qm.Queued
235 qm.RecipientDomainStr = formatIPDomain(qm.RecipientDomain)
238 if qm.SenderAccount == "" {
239 return fmt.Errorf("cannot queue with localserve without local account")
241 acc, err := store.OpenAccount(qm.SenderAccount)
243 return fmt.Errorf("opening sender account for immediate delivery with localserve: %v", err)
247 log.Check(err, "closing account")
249 m := store.Message{Size: qm.Size, MsgPrefix: qm.MsgPrefix}
250 conf, _ := acc.Conf()
251 dest := conf.Destinations[qm.Sender().String()]
252 acc.WithWLock(func() {
253 err = acc.DeliverDestination(log, dest, &m, msgFile)
256 return fmt.Errorf("delivering message: %v", err)
258 log.Debug("immediately delivered from queue to sender")
262 tx, err := DB.Begin(ctx, true)
264 return fmt.Errorf("begin transaction: %w", err)
268 if err := tx.Rollback(); err != nil {
269 log.Errorx("rollback for queue", err)
274 if err := tx.Insert(qm); err != nil {
278 dst := qm.MessagePath()
281 err := os.Remove(dst)
282 log.Check(err, "removing destination message file for queue", mlog.Field("path", dst))
285 dstDir := filepath.Dir(dst)
286 os.MkdirAll(dstDir, 0770)
287 if err := moxio.LinkOrCopy(log, dst, msgFile.Name(), nil, true); err != nil {
288 return fmt.Errorf("linking/copying message to new file: %s", err)
289 } else if err := moxio.SyncDir(dstDir); err != nil {
290 return fmt.Errorf("sync directory: %v", err)
293 if err := tx.Commit(); err != nil {
294 return fmt.Errorf("commit transaction: %s", err)
303func formatIPDomain(d dns.IPDomain) string {
305 return "[" + d.IP.String() + "]"
307 return d.Domain.Name()
311 kick = make(chan struct{}, 1)
312 deliveryResult = make(chan string, 1)
317 case kick <- struct{}{}:
322// Kick sets the NextAttempt for messages matching all filter parameters (ID,
323// toDomain, recipient) that are nonzero, and kicks the queue, attempting delivery
324// of those messages. If all parameters are zero, all messages are kicked. If
325// transport is set, the delivery attempts for the matching messages will use the
326// transport. An empty string is the default transport, i.e. direct delivery.
327// Returns number of messages queued for immediate delivery.
328func Kick(ctx context.Context, ID int64, toDomain, recipient string, transport *string) (int, error) {
329 q := bstore.QueryDB[Msg](ctx, DB)
334 q.FilterEqual("RecipientDomainStr", toDomain)
337 q.FilterFn(func(qm Msg) bool {
338 return qm.Recipient().XString(true) == recipient
341 up := map[string]any{"NextAttempt": time.Now()}
342 if transport != nil {
343 if *transport != "" {
344 _, ok := mox.Conf.Static.Transports[*transport]
346 return 0, fmt.Errorf("unknown transport %q", *transport)
349 up["Transport"] = *transport
351 n, err := q.UpdateFields(up)
353 return 0, fmt.Errorf("selecting and updating messages in queue: %v", err)
359// Drop removes messages from the queue that match all nonzero parameters.
360// If all parameters are zero, all messages are removed.
361// Returns number of messages removed.
362func Drop(ctx context.Context, ID int64, toDomain string, recipient string) (int, error) {
363 q := bstore.QueryDB[Msg](ctx, DB)
368 q.FilterEqual("RecipientDomainStr", toDomain)
371 q.FilterFn(func(qm Msg) bool {
372 return qm.Recipient().XString(true) == recipient
379 return 0, fmt.Errorf("selecting and deleting messages from queue: %v", err)
381 for _, m := range msgs {
383 if err := os.Remove(p); err != nil {
384 xlog.WithContext(ctx).Errorx("removing queue message from file system", err, mlog.Field("queuemsgid", m.ID), mlog.Field("path", p))
390// SaveRequireTLS updates the RequireTLS field of the message with id.
391func SaveRequireTLS(ctx context.Context, id int64, requireTLS *bool) error {
392 return DB.Write(ctx, func(tx *bstore.Tx) error {
394 if err := tx.Get(&m); err != nil {
395 return fmt.Errorf("get message: %w", err)
397 m.RequireTLS = requireTLS
402type ReadReaderAtCloser interface {
407// OpenMessage opens a message present in the queue.
408func OpenMessage(ctx context.Context, id int64) (ReadReaderAtCloser, error) {
410 err := DB.Get(ctx, &qm)
414 f, err := os.Open(qm.MessagePath())
416 return nil, fmt.Errorf("open message file: %s", err)
418 r := store.FileMsgReader(qm.MsgPrefix, f)
422const maxConcurrentDeliveries = 10
424// Start opens the database by calling Init, then starts the delivery process.
425func Start(resolver dns.Resolver, done chan struct{}) error {
426 if err := Init(); err != nil {
432 // Map keys are either dns.Domain.Name()'s, or string-formatted IP addresses.
433 busyDomains := map[string]struct{}{}
435 timer := time.NewTimer(0)
439 case <-mox.Shutdown.Done():
444 case domain := <-deliveryResult:
445 delete(busyDomains, domain)
448 if len(busyDomains) >= maxConcurrentDeliveries {
452 launchWork(resolver, busyDomains)
453 timer.Reset(nextWork(mox.Shutdown, busyDomains))
459func nextWork(ctx context.Context, busyDomains map[string]struct{}) time.Duration {
460 q := bstore.QueryDB[Msg](ctx, DB)
461 if len(busyDomains) > 0 {
463 for d := range busyDomains {
464 doms = append(doms, d)
466 q.FilterNotEqual("RecipientDomainStr", doms...)
468 q.SortAsc("NextAttempt")
471 if err == bstore.ErrAbsent {
472 return 24 * time.Hour
473 } else if err != nil {
474 xlog.Errorx("finding time for next delivery attempt", err)
475 return 1 * time.Minute
477 return time.Until(qm.NextAttempt)
480func launchWork(resolver dns.Resolver, busyDomains map[string]struct{}) int {
481 q := bstore.QueryDB[Msg](mox.Shutdown, DB)
482 q.FilterLessEqual("NextAttempt", time.Now())
483 q.SortAsc("NextAttempt")
484 q.Limit(maxConcurrentDeliveries)
485 if len(busyDomains) > 0 {
487 for d := range busyDomains {
488 doms = append(doms, d)
490 q.FilterNotEqual("RecipientDomainStr", doms...)
492 msgs, err := q.List()
494 xlog.Errorx("querying for work in queue", err)
495 mox.Sleep(mox.Shutdown, 1*time.Second)
499 for _, m := range msgs {
500 busyDomains[formatIPDomain(m.RecipientDomain)] = struct{}{}
501 go deliver(resolver, m)
506// Remove message from queue in database and file system.
507func queueDelete(ctx context.Context, msgID int64) error {
508 if err := DB.Delete(ctx, &Msg{ID: msgID}); err != nil {
511 // If removing from database fails, we'll also leave the file in the file system.
513 p := mox.DataDirPath(filepath.Join("queue", store.MessagePath(msgID)))
514 if err := os.Remove(p); err != nil {
515 return fmt.Errorf("removing queue message from file system: %v", err)
521// deliver attempts to deliver a message.
522// The queue is updated, either by removing a delivered or permanently failed
523// message, or updating the time for the next attempt. A DSN may be sent.
524func deliver(resolver dns.Resolver, m Msg) {
526 qlog := xlog.WithCid(cid).Fields(mlog.Field("from", m.Sender()), mlog.Field("recipient", m.Recipient()), mlog.Field("attempts", m.Attempts), mlog.Field("msgid", m.ID))
529 deliveryResult <- formatIPDomain(m.RecipientDomain)
533 qlog.Error("deliver panic", mlog.Field("panic", x))
535 metrics.PanicInc(metrics.Queue)
539 // We register this attempt by setting last_attempt, and already next_attempt time
540 // in the future with exponential backoff. If we run into trouble delivery below,
541 // at least we won't be bothering the receiving server with our problems.
542 // Delivery attempts: immediately, 7.5m, 15m, 30m, 1h, 2h (send delayed DSN), 4h,
543 // 8h, 16h (send permanent failure DSN).
546 backoff := time.Duration(7*60+30+jitter.Intn(10)-5) * time.Second
547 for i := 0; i < m.Attempts; i++ {
548 backoff *= time.Duration(2)
553 m.NextAttempt = now.Add(backoff)
554 qup := bstore.QueryDB[Msg](mox.Shutdown, DB)
556 update := Msg{Attempts: m.Attempts, NextAttempt: m.NextAttempt, LastAttempt: m.LastAttempt}
557 if _, err := qup.UpdateNonzero(update); err != nil {
558 qlog.Errorx("storing delivery attempt", err)
562 // Find route for transport to use for delivery attempt.
563 var transport config.Transport
564 var transportName string
565 if m.Transport != "" {
567 transport, ok = mox.Conf.Static.Transports[m.Transport]
570 fail(qlog, m, backoff, false, remoteMTA, "", fmt.Sprintf("cannot find transport %q", m.Transport))
573 transportName = m.Transport
575 route := findRoute(m.Attempts-1, m)
576 transport = route.ResolvedTransport
577 transportName = route.Transport
580 if transportName != "" {
581 qlog = qlog.Fields(mlog.Field("transport", transportName))
582 qlog.Debug("delivering with transport", mlog.Field("transport", transportName))
585 // We gather TLS connection successes and failures during delivery, and we store
586 // them in tlsrptb. Every 24 hours we send an email with a report to the recipient
587 // domains that opt in via a TLSRPT DNS record. For us, the tricky part is
588 // collecting all reporting information. We've got several TLS modes
589 // (opportunistic, DANE and/or MTA-STS (PKIX), overrides due to Require TLS).
590 // Failures can happen at various levels: MTA-STS policies (apply to whole delivery
591 // attempt/domain), MX targets (possibly multiple per delivery attempt, both for
592 // MTA-STS and DANE).
594 // Once the SMTP client has tried a TLS handshake, we register success/failure,
595 // regardless of what happens next on the connection. We also register failures
596 // when they happen before we get to the SMTP client, but only if they are related
597 // to TLS (and some DNSSEC).
598 var recipientDomainResult tlsrpt.Result
599 var hostResults []tlsrpt.Result
601 if mox.Conf.Static.NoOutgoingTLSReports || m.RecipientDomain.IsIP() {
606 dayUTC := now.UTC().Format("20060102")
608 // See if this contains a failure. If not, we'll mark TLS results for delivering
609 // DMARC reports SendReport false, so we won't as easily get into a report sending
612 for _, result := range hostResults {
613 if result.Summary.TotalFailureSessionCount > 0 {
618 if recipientDomainResult.Summary.TotalFailureSessionCount > 0 {
622 results := make([]tlsrptdb.TLSResult, 0, 1+len(hostResults))
623 tlsaPolicyDomains := map[string]bool{}
624 addResult := func(r tlsrpt.Result, isHost bool) {
625 var zerotype tlsrpt.PolicyType
626 if r.Policy.Type == zerotype {
630 // Ensure we store policy domain in unicode in database.
631 policyDomain, err := dns.ParseDomain(r.Policy.Domain)
633 qlog.Errorx("parsing policy domain for tls result", err, mlog.Field("policydomain", r.Policy.Domain))
637 if r.Policy.Type == tlsrpt.TLSA {
638 tlsaPolicyDomains[policyDomain.ASCII] = true
641 tlsResult := tlsrptdb.TLSResult{
642 PolicyDomain: policyDomain.Name(),
644 RecipientDomain: m.RecipientDomain.Domain.Name(),
646 SendReport: !m.IsTLSReport && (!m.IsDMARCReport || failure),
647 Results: []tlsrpt.Result{r},
649 results = append(results, tlsResult)
651 for _, result := range hostResults {
652 addResult(result, true)
654 // If we were delivering to a mail host directly (not a domain with MX records), we
655 // are more likely to get a TLSA policy than an STS policy. Don't potentially
656 // confuse operators with both a tlsa and no-policy-found result.
658 if recipientDomainResult.Policy.Type != tlsrpt.NoPolicyFound || !tlsaPolicyDomains[recipientDomainResult.Policy.Domain] {
659 addResult(recipientDomainResult, false)
662 if len(results) > 0 {
663 err := tlsrptdb.AddTLSResults(context.Background(), results)
664 qlog.Check(err, "adding tls results to database for upcoming tlsrpt report")
668 var dialer smtpclient.Dialer = &net.Dialer{}
669 if transport.Submissions != nil {
670 deliverSubmit(cid, qlog, resolver, dialer, m, backoff, transportName, transport.Submissions, true, 465)
671 } else if transport.Submission != nil {
672 deliverSubmit(cid, qlog, resolver, dialer, m, backoff, transportName, transport.Submission, false, 587)
673 } else if transport.SMTP != nil {
674 // todo future: perhaps also gather tlsrpt results for submissions.
675 deliverSubmit(cid, qlog, resolver, dialer, m, backoff, transportName, transport.SMTP, false, 25)
677 ourHostname := mox.Conf.Static.HostnameDomain
678 if transport.Socks != nil {
679 socksdialer, err := proxy.SOCKS5("tcp", transport.Socks.Address, nil, &net.Dialer{})
681 fail(qlog, m, backoff, false, dsn.NameIP{}, "", fmt.Sprintf("socks dialer: %v", err))
683 } else if d, ok := socksdialer.(smtpclient.Dialer); !ok {
684 fail(qlog, m, backoff, false, dsn.NameIP{}, "", "socks dialer is not a contextdialer")
689 ourHostname = transport.Socks.Hostname
691 recipientDomainResult, hostResults = deliverDirect(cid, qlog, resolver, dialer, ourHostname, transportName, m, backoff)
695func findRoute(attempt int, m Msg) config.Route {
696 routesAccount, routesDomain, routesGlobal := mox.Conf.Routes(m.SenderAccount, m.SenderDomain.Domain)
697 if r, ok := findRouteInList(attempt, m, routesAccount); ok {
700 if r, ok := findRouteInList(attempt, m, routesDomain); ok {
703 if r, ok := findRouteInList(attempt, m, routesGlobal); ok {
706 return config.Route{}
709func findRouteInList(attempt int, m Msg, routes []config.Route) (config.Route, bool) {
710 for _, r := range routes {
711 if routeMatch(attempt, m, r) {
715 return config.Route{}, false
718func routeMatch(attempt int, m Msg, r config.Route) bool {
719 return attempt >= r.MinimumAttempts && routeMatchDomain(r.FromDomainASCII, m.SenderDomain.Domain) && routeMatchDomain(r.ToDomainASCII, m.RecipientDomain.Domain)
722func routeMatchDomain(l []string, d dns.Domain) bool {
726 for _, e := range l {
727 if d.ASCII == e || strings.HasPrefix(e, ".") && (d.ASCII == e[1:] || strings.HasSuffix(d.ASCII, e)) {