1package queue
2
3import (
4 "bytes"
5 "context"
6 "errors"
7 "fmt"
8 "io"
9 "log/slog"
10 "net"
11 "os"
12 "strings"
13 "sync/atomic"
14 "time"
15
16 "github.com/prometheus/client_golang/prometheus"
17 "github.com/prometheus/client_golang/prometheus/promauto"
18
19 "github.com/mjl-/adns"
20 "github.com/mjl-/bstore"
21
22 "github.com/mjl-/mox/config"
23 "github.com/mjl-/mox/dns"
24 "github.com/mjl-/mox/dsn"
25 "github.com/mjl-/mox/mlog"
26 "github.com/mjl-/mox/mox-"
27 "github.com/mjl-/mox/mtasts"
28 "github.com/mjl-/mox/mtastsdb"
29 "github.com/mjl-/mox/smtp"
30 "github.com/mjl-/mox/smtpclient"
31 "github.com/mjl-/mox/store"
32 "github.com/mjl-/mox/tlsrpt"
33 "github.com/mjl-/mox/webhook"
34)
35
36// Increased each time an outgoing connection is made for direct delivery. Used by
37// dnsbl monitoring to pace querying.
38var connectionCounter atomic.Int64
39
40var (
41 metricDestinations = promauto.NewCounter(
42 prometheus.CounterOpts{
43 Name: "mox_queue_destinations_total",
44 Help: "Total destination (e.g. MX) lookups for delivery attempts, including those in mox_smtpclient_destinations_authentic_total.",
45 },
46 )
47 metricDestinationsAuthentic = promauto.NewCounter(
48 prometheus.CounterOpts{
49 Name: "mox_queue_destinations_authentic_total",
50 Help: "Destination (e.g. MX) lookups for delivery attempts authenticated with DNSSEC so they are candidates for DANE verification.",
51 },
52 )
53 metricDestinationDANERequired = promauto.NewCounter(
54 prometheus.CounterOpts{
55 Name: "mox_queue_destination_dane_required_total",
56 Help: "Total number of connections to hosts with valid TLSA records making DANE required.",
57 },
58 )
59 metricDestinationDANESTARTTLSUnverified = promauto.NewCounter(
60 prometheus.CounterOpts{
61 Name: "mox_queue_destination_dane_starttlsunverified_total",
62 Help: "Total number of connections with required DANE where all TLSA records were unusable.",
63 },
64 )
65 metricDestinationDANEGatherTLSAErrors = promauto.NewCounter(
66 prometheus.CounterOpts{
67 Name: "mox_queue_destination_dane_gathertlsa_errors_total",
68 Help: "Total number of connections where looking up TLSA records resulted in an error.",
69 },
70 )
71 // todo: recognize when "tls-required-no" message header caused a non-verifying certificate to be overridden. requires doing our own certificate validation after having set tls.Config.InsecureSkipVerify due to tls-required-no.
72 metricTLSRequiredNoIgnored = promauto.NewCounterVec(
73 prometheus.CounterOpts{
74 Name: "mox_queue_tlsrequiredno_ignored_total",
75 Help: "Delivery attempts with TLS policy findings ignored due to message with TLS-Required: No header. Does not cover case where TLS certificate cannot be PKIX-verified.",
76 },
77 []string{
78 "ignored", // mtastspolicy (error getting policy), mtastsmx (mx host not allowed in policy), badtls (error negotiating tls), badtlsa (error fetching dane tlsa records)
79 },
80 )
81 metricRequireTLSUnsupported = promauto.NewCounterVec(
82 prometheus.CounterOpts{
83 Name: "mox_queue_requiretls_unsupported_total",
84 Help: "Delivery attempts that failed due to message with REQUIRETLS.",
85 },
86 []string{
87 "reason", // nopolicy (no mta-sts and no dane), norequiretls (smtp server does not support requiretls)
88 },
89 )
90 metricPlaintextFallback = promauto.NewCounter(
91 prometheus.CounterOpts{
92 Name: "mox_queue_plaintext_fallback_total",
93 Help: "Delivery attempts with fallback to plain text delivery.",
94 },
95 )
96)
97
98func ConnectionCounter() int64 {
99 return connectionCounter.Load()
100}
101
102type msgResp struct {
103 msg *Msg
104 resp smtpclient.Response
105}
106
107// Delivery by directly dialing (MX) hosts for destination domain of message.
108//
109// The returned results are for use in a TLSRPT report, it holds success/failure
110// counts and failure details for delivery/connection attempts. The
111// recipientDomainResult is for policies/counts/failures about the whole recipient
112// domain (MTA-STS), its policy type can be empty, in which case there is no
113// information (e.g. internal failure). hostResults are per-host details (DANE, one
114// per MX target).
115func deliverDirect(qlog mlog.Log, resolver dns.Resolver, dialer smtpclient.Dialer, ourHostname dns.Domain, transportName string, transportDirect *config.TransportDirect, msgs []*Msg, backoff time.Duration) (recipientDomainResult tlsrpt.Result, hostResults []tlsrpt.Result) {
116 // High-level approach:
117 // - Resolve domain to deliver to (CNAME), and determine hosts to try to deliver to (MX)
118 // - Get MTA-STS policy for domain (optional). If present, only deliver to its
119 // allowlisted hosts and verify TLS against CA pool.
120 // - For each host, attempt delivery. If the attempt results in a permanent failure
121 // (as claimed by remote with a 5xx SMTP response, or perhaps decided by us), the
122 // attempt can be aborted. Other errors are often temporary and may result in later
123 // successful delivery. But hopefully the delivery just succeeds. For each host:
124 // - If there is an MTA-STS policy, we only connect to allow-listed hosts.
125 // - We try to lookup DANE records (optional) and verify them if present.
126 // - If RequireTLS is true, we only deliver if the remote SMTP server implements it.
127 // - If RequireTLS is false, we'll fall back to regular delivery attempts without
128 // TLS verification and possibly without TLS at all, ignoring recipient domain/host
129 // MTA-STS and DANE policies.
130
131 // For convenience, we use m0 to access properties that are shared over all
132 // messages we are delivering.
133 m0 := msgs[0]
134
135 // Resolve domain and hosts to attempt delivery to.
136 // These next-hop names are often the name under which we find MX records. The
137 // expanded name is different from the original if the original was a CNAME,
138 // possibly a chain. If there are no MX records, it can be an IP or the host
139 // directly.
140 origNextHop := m0.RecipientDomain.Domain
141 ctx := mox.Shutdown
142 haveMX, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, hosts, permanent, err := smtpclient.GatherDestinations(ctx, qlog.Logger, resolver, m0.RecipientDomain)
143 if err != nil {
144 // If this is a DNSSEC authentication error, we'll collect it for TLS reporting.
145 // Hopefully it's a temporary misconfiguration that is solve before we try to send
146 // our report. We don't report as "dnssec-invalid", because that is defined as
147 // being for DANE. ../rfc/8460:580
148 var errCode adns.ErrorCode
149 if errors.As(err, &errCode) && errCode.IsAuthentication() {
150 // Result: ../rfc/8460:567
151 reasonCode := fmt.Sprintf("dns-extended-error-%d-%s", errCode, strings.ReplaceAll(errCode.String(), " ", "-"))
152 fd := tlsrpt.Details(tlsrpt.ResultValidationFailure, reasonCode)
153 recipientDomainResult = tlsrpt.MakeResult(tlsrpt.NoPolicyFound, origNextHop, fd)
154 recipientDomainResult.Summary.TotalFailureSessionCount++
155 }
156 if permanent {
157 err = smtpclient.Error{Permanent: true, Err: err}
158 }
159 failMsgsDB(qlog, msgs, m0.DialedIPs, backoff, dsn.NameIP{}, err)
160 return
161 }
162
163 tlsRequiredNo := m0.RequireTLS != nil && !*m0.RequireTLS
164
165 // Check for MTA-STS policy and enforce it if needed.
166 // We must check at the original next-hop, i.e. recipient domain, not following any
167 // CNAMEs. If we were to follow CNAMEs and ask for MTA-STS at that domain, it
168 // would only take a single CNAME DNS response to direct us to an unrelated domain.
169 var policy *mtasts.Policy // Policy can have mode enforce, testing and none.
170 if !origNextHop.IsZero() {
171 policy, recipientDomainResult, _, err = mtastsdb.Get(ctx, qlog.Logger, resolver, origNextHop)
172 if err != nil {
173 if tlsRequiredNo {
174 qlog.Infox("mtasts lookup temporary error, continuing due to tls-required-no message header", err, slog.Any("domain", origNextHop))
175 metricTLSRequiredNoIgnored.WithLabelValues("mtastspolicy").Inc()
176 } else {
177 qlog.Infox("mtasts lookup temporary error, aborting delivery attempt", err, slog.Any("domain", origNextHop))
178 recipientDomainResult.Summary.TotalFailureSessionCount++
179 failMsgsDB(qlog, msgs, m0.DialedIPs, backoff, dsn.NameIP{}, err)
180 return
181 }
182 }
183 // note: policy can be nil, if a domain does not implement MTA-STS or it's the
184 // first time we fetch the policy and if we encountered an error.
185 }
186
187 // We try delivery to each host until we have success or a permanent failure. So
188 // for transient errors, we'll try the next host. For MX records pointing to a
189 // dual stack host, we turn a permanent failure due to policy on the first delivery
190 // attempt into a temporary failure and make sure to try the other address family
191 // the next attempt. This should reduce issues due to one of our IPs being on a
192 // block list. We won't try multiple IPs of the same address family. Surprisingly,
193 // RFC 5321 does not specify a clear algorithm, but common practice is probably
194 // ../rfc/3974:268.
195 var remoteMTA dsn.NameIP
196 var lastErr = errors.New("no error") // Can be smtpclient.Error.
197 nmissingRequireTLS := 0
198 // todo: should make distinction between host permanently not accepting the message, and the message not being deliverable permanently. e.g. a mx host may have a size limit, or not accept 8bitmime, while another host in the list does accept the message. same for smtputf8, ../rfc/6531:555
199 for _, h := range hosts {
200 // ../rfc/8461:913
201 if policy != nil && policy.Mode != mtasts.ModeNone && !policy.Matches(h.Domain) {
202 // todo: perhaps only send tlsrpt failure if none of the mx hosts matched? reporting about each mismatch seems useful for domain owners, to discover mtasts policies they didn't update after changing mx. there is a risk a domain owner intentionally didn't put all mx'es in the mtasts policy, but they probably won't mind being reported about that.
203 // Other error: Surprising that TLSRPT doesn't have an MTA-STS specific error code
204 // for this case, it's a big part of the reason to have MTA-STS. ../rfc/8460:610
205 // Result: ../rfc/8460:567 todo spec: propose adding a result for this case?
206 fd := tlsrpt.Details(tlsrpt.ResultValidationFailure, "mtasts-policy-mx-mismatch")
207 fd.ReceivingMXHostname = h.Domain.ASCII
208 recipientDomainResult.Add(0, 0, fd)
209
210 var policyHosts []string
211 for _, mx := range policy.MX {
212 policyHosts = append(policyHosts, mx.LogString())
213 }
214 if policy.Mode == mtasts.ModeEnforce {
215 if tlsRequiredNo {
216 qlog.Info("mx host does not match mta-sts policy in mode enforce, ignoring due to tls-required-no message header", slog.Any("host", h.Domain), slog.Any("policyhosts", policyHosts))
217 metricTLSRequiredNoIgnored.WithLabelValues("mtastsmx").Inc()
218 } else {
219 lastErr = fmt.Errorf("mx host %s does not match enforced mta-sts policy with hosts %s", h.Domain, strings.Join(policyHosts, ","))
220 qlog.Error("mx host does not match mta-sts policy in mode enforce, skipping", slog.Any("host", h.Domain), slog.Any("policyhosts", policyHosts))
221 recipientDomainResult.Summary.TotalFailureSessionCount++
222 continue
223 }
224 } else {
225 qlog.Error("mx host does not match mta-sts policy, but it is not enforced, continuing", slog.Any("host", h.Domain), slog.Any("policyhosts", policyHosts))
226 }
227 }
228
229 qlog.Info("delivering to remote", slog.Any("remote", h))
230 nqlog := qlog.WithCid(mox.Cid())
231 var remoteIP net.IP
232
233 enforceMTASTS := policy != nil && policy.Mode == mtasts.ModeEnforce
234 tlsMode := smtpclient.TLSOpportunistic
235 tlsPKIX := false
236 if enforceMTASTS {
237 tlsMode = smtpclient.TLSRequiredStartTLS
238 tlsPKIX = true
239 // note: smtpclient will still go through PKIX verification, and report about it, but not fail the connection if not passing.
240 }
241
242 // Try to deliver to host. We can get various errors back. Like permanent failure
243 // response codes, TCP, DNSSEC, TLS (opportunistic, i.e. optional with fallback to
244 // without), etc. It's a balancing act to handle these situations correctly. We
245 // don't want to bounce unnecessarily. But also not keep trying if there is no
246 // chance of success.
247 //
248 // deliverHost will report generic TLS and MTA-STS-specific failures in
249 // recipientDomainResult. If DANE is encountered, it will add a DANE reporting
250 // result for generic TLS and DANE-specific errors.
251
252 msgResps := make([]*msgResp, len(msgs))
253 for i := range msgs {
254 msgResps[i] = &msgResp{msg: msgs[i]}
255 }
256
257 result := deliverHost(nqlog, resolver, dialer, ourHostname, transportName, transportDirect, h, enforceMTASTS, haveMX, origNextHopAuthentic, origNextHop, expandedNextHopAuthentic, expandedNextHop, msgResps, tlsMode, tlsPKIX, &recipientDomainResult)
258
259 var zerotype tlsrpt.PolicyType
260 if result.hostResult.Policy.Type != zerotype {
261 hostResults = append(hostResults, result.hostResult)
262 }
263
264 // If we had a TLS-related failure when doing TLS, and we don't have a requirement
265 // for MTA-STS/DANE, we try again without TLS. This could be an old server that
266 // only does ancient TLS versions, or has a misconfiguration. Note that
267 // opportunistic TLS does not do regular certificate verification, so that can't be
268 // the problem.
269 // ../rfc/7435:459
270 // We don't fall back to plain text for DMARC reports. ../rfc/7489:1768 ../rfc/7489:2683
271 // We queue outgoing TLS reports with tlsRequiredNo, so reports can be delivered in
272 // case of broken TLS.
273 if result.err != nil && errors.Is(result.err, smtpclient.ErrTLS) && (!enforceMTASTS && tlsMode == smtpclient.TLSOpportunistic && !result.tlsDANE && !m0.IsDMARCReport || tlsRequiredNo) {
274 metricPlaintextFallback.Inc()
275 if tlsRequiredNo {
276 metricTLSRequiredNoIgnored.WithLabelValues("badtls").Inc()
277 }
278
279 // todo future: add a configuration option to not fall back?
280 nqlog.Info("connecting again for delivery attempt without tls",
281 slog.Bool("enforcemtasts", enforceMTASTS),
282 slog.Bool("tlsdane", result.tlsDANE),
283 slog.Any("requiretls", m0.RequireTLS))
284 result = deliverHost(nqlog, resolver, dialer, ourHostname, transportName, transportDirect, h, enforceMTASTS, haveMX, origNextHopAuthentic, origNextHop, expandedNextHopAuthentic, expandedNextHop, msgResps, smtpclient.TLSSkip, false, &tlsrpt.Result{})
285 }
286
287 remoteMTA = dsn.NameIP{Name: h.XString(false), IP: remoteIP}
288 if result.err != nil {
289 lastErr = result.err
290 var cerr smtpclient.Error
291 if errors.As(result.err, &cerr) {
292 if cerr.Secode == smtp.SePol7MissingReqTLS30 {
293 nmissingRequireTLS++
294 }
295 if cerr.Permanent {
296 break
297 }
298 }
299 continue
300 }
301
302 delMsgs := make([]Msg, len(result.delivered))
303 for i, mr := range result.delivered {
304 mqlog := nqlog.With(slog.Int64("msgid", mr.msg.ID), slog.Any("recipient", mr.msg.Recipient()))
305 mqlog.Info("delivered from queue")
306 mr.msg.markResult(0, "", "", true)
307 delMsgs[i] = *mr.msg
308 }
309 if len(delMsgs) > 0 {
310 err := DB.Write(context.Background(), func(tx *bstore.Tx) error {
311 return retireMsgs(nqlog, tx, webhook.EventDelivered, 0, "", nil, delMsgs...)
312 })
313 if err != nil {
314 nqlog.Errorx("deleting messages from queue database after delivery", err)
315 } else if err := removeMsgsFS(nqlog, delMsgs...); err != nil {
316 nqlog.Errorx("removing queued messages from file system after delivery", err)
317 }
318 kick()
319 }
320 if len(result.failed) > 0 {
321 err := DB.Write(context.Background(), func(tx *bstore.Tx) error {
322 for _, mr := range result.failed {
323 failMsgsTx(nqlog, tx, []*Msg{mr.msg}, m0.DialedIPs, backoff, remoteMTA, smtpclient.Error(mr.resp))
324 }
325 return nil
326 })
327 if err != nil {
328 for _, mr := range result.failed {
329 nqlog.Errorx("error processing delivery failure for messages", err,
330 slog.Int64("msgid", mr.msg.ID),
331 slog.Any("recipient", mr.msg.Recipient()))
332 }
333 }
334 kick()
335 }
336 return
337 }
338
339 // In theory, we could make a failure permanent if we didn't find any mx host
340 // matching the mta-sts policy AND the policy is fresh AND all DNS records leading
341 // to the MX targets (including CNAME) have a TTL that is beyond the latest
342 // possible delivery attempt. Until that time, configuration problems can be
343 // corrected through DNS or policy update. Not sure if worth it in practice, there
344 // is a good chance the MX records can still change, at least on initial delivery
345 // failures.
346 // todo: possibly detect that future deliveries will fail due to long ttl's of cached records that are preventing delivery.
347
348 // If we failed due to requiretls not being satisfied, make the delivery permanent.
349 // It is unlikely the recipient domain will implement requiretls during our retry
350 // period. Best to let the sender know immediately.
351 if len(hosts) > 0 && nmissingRequireTLS == len(hosts) {
352 qlog.Info("marking delivery as permanently failed because recipient domain does not implement requiretls")
353 err := smtpclient.Error{
354 Permanent: true,
355 Code: smtp.C554TransactionFailed,
356 Secode: smtp.SePol7MissingReqTLS30,
357 Err: fmt.Errorf("destination servers do not support requiretls"),
358 }
359 failMsgsDB(qlog, msgs, m0.DialedIPs, backoff, remoteMTA, err)
360 return
361 }
362
363 failMsgsDB(qlog, msgs, m0.DialedIPs, backoff, remoteMTA, lastErr)
364 return
365}
366
367type deliverResult struct {
368 tlsDANE bool
369 remoteIP net.IP
370 hostResult tlsrpt.Result
371
372 // If err is set, no messages were delivered but delivered and failed are still
373 // nil. If err is not set, delivered and always add up to all msgs requested to be
374 // sent. All messages can be in failed.
375 delivered []*msgResp
376 failed []*msgResp
377 err error
378}
379
380// deliverHost attempts to deliver msgs to host. All msgs must have the same
381// delivery requirements (e.g. requiretls). Depending on tlsMode we'll do
382// opportunistic or required STARTTLS or skip TLS entirely. Based on tlsPKIX we do
383// PKIX/WebPKI verification (for MTA-STS). If we encounter DANE records, we verify
384// those. If the message has a message header "TLS-Required: No", we ignore TLS
385// verification errors.
386//
387// deliverHost updates DialedIPs of msgs, which must be saved in case of failure to
388// deliver.
389//
390// The haveMX and next-hop-authentic fields are used to determine if DANE is
391// applicable. The next-hop fields themselves are used to determine valid names
392// during DANE TLS certificate verification.
393//
394// The returned hostResult holds TLSRPT reporting results for the connection
395// attempt. Its policy type can be the zero value, indicating there was no finding
396// (e.g. internal error).
397//
398// deliverHost may send a message multiple times: if the server doesn't accept
399// multiple recipients for a message.
400func deliverHost(log mlog.Log, resolver dns.Resolver, dialer smtpclient.Dialer, ourHostname dns.Domain, transportName string, transportDirect *config.TransportDirect, host dns.IPDomain, enforceMTASTS, haveMX, origNextHopAuthentic bool, origNextHop dns.Domain, expandedNextHopAuthentic bool, expandedNextHop dns.Domain, msgResps []*msgResp, tlsMode smtpclient.TLSMode, tlsPKIX bool, recipientDomainResult *tlsrpt.Result) (result deliverResult) {
401 // About attempting delivery to multiple addresses of a host: ../rfc/5321:3898
402
403 m0 := msgResps[0].msg
404 tlsRequiredNo := m0.RequireTLS != nil && !*m0.RequireTLS
405
406 var tlsDANE bool
407 var remoteIP net.IP
408 var hostResult tlsrpt.Result
409 start := time.Now()
410 defer func() {
411 result.tlsDANE = tlsDANE
412 result.remoteIP = remoteIP
413 result.hostResult = hostResult
414
415 mode := string(tlsMode)
416 if tlsPKIX {
417 mode += "+mtasts"
418 }
419 if tlsDANE {
420 mode += "+dane"
421 }
422
423 r := deliveryResult(result.err, len(result.delivered), len(result.failed))
424 d := float64(time.Since(start)) / float64(time.Second)
425 metricDelivery.WithLabelValues(fmt.Sprintf("%d", m0.Attempts), transportName, mode, r).Observe(d)
426
427 log.Debugx("queue deliverhost result", result.err,
428 slog.Any("host", host),
429 slog.Int("attempt", m0.Attempts),
430 slog.String("result", r),
431 slog.Int("delivered", len(result.delivered)),
432 slog.Int("failed", len(result.failed)),
433 slog.Any("tlsmode", tlsMode),
434 slog.Bool("tlspkix", tlsPKIX),
435 slog.Bool("tlsdane", tlsDANE),
436 slog.Bool("tlsrequiredno", tlsRequiredNo),
437 slog.Bool("badtls", result.err != nil && errors.Is(result.err, smtpclient.ErrTLS)),
438 slog.Duration("duration", time.Since(start)))
439 }()
440
441 // Open message to deliver.
442 f, err := os.Open(m0.MessagePath())
443 if err != nil {
444 return deliverResult{err: fmt.Errorf("open message file: %v", err)}
445 }
446 msgr := store.FileMsgReader(m0.MsgPrefix, f)
447 defer func() {
448 err := msgr.Close()
449 log.Check(err, "closing message after delivery attempt")
450 }()
451
452 ctx, cancel := context.WithTimeout(mox.Shutdown, 30*time.Second)
453 defer cancel()
454
455 // We must lookup the IPs for the host name before checking DANE TLSA records. And
456 // only check TLSA records for secure responses. This prevents problems with old
457 // name servers returning an error for TLSA requests or letting it timeout (not
458 // sending a response). ../rfc/7672:879
459 var daneRecords []adns.TLSA
460 var tlsHostnames []dns.Domain
461 if host.IsDomain() {
462 tlsHostnames = []dns.Domain{host.Domain}
463 }
464 for _, mr := range msgResps {
465 if mr.msg.DialedIPs == nil {
466 mr.msg.DialedIPs = map[string][]net.IP{}
467 }
468 }
469
470 countResultFailure := func() {
471 recipientDomainResult.Summary.TotalFailureSessionCount++
472 hostResult.Summary.TotalFailureSessionCount++
473 }
474
475 metricDestinations.Inc()
476 network := "ip"
477 if transportDirect != nil {
478 if network != transportDirect.IPFamily {
479 log.Debug("set custom IP network family for direct transport", slog.Any("network", transportDirect.IPFamily))
480 network = transportDirect.IPFamily
481 }
482 }
483 authentic, expandedAuthentic, expandedHost, ips, dualstack, err := smtpclient.GatherIPs(ctx, log.Logger, resolver, network, host, m0.DialedIPs)
484 destAuthentic := err == nil && authentic && origNextHopAuthentic && (!haveMX || expandedNextHopAuthentic) && host.IsDomain()
485 if !destAuthentic {
486 log.Debugx("not attempting verification with dane", err, slog.Bool("authentic", authentic), slog.Bool("expandedauthentic", expandedAuthentic))
487
488 // Track a DNSSEC error if found.
489 var errCode adns.ErrorCode
490 if err != nil {
491 if errors.As(err, &errCode) && errCode.IsAuthentication() {
492 // Result: ../rfc/8460:567
493 reasonCode := fmt.Sprintf("dns-extended-error-%d-%s", errCode, strings.ReplaceAll(errCode.String(), " ", "-"))
494 fd := tlsrpt.Details(tlsrpt.ResultValidationFailure, reasonCode)
495 hostResult = tlsrpt.MakeResult(tlsrpt.TLSA, host.Domain, fd)
496 countResultFailure()
497 }
498 } else {
499 // todo: we could lookup tlsa records, and log an error when they are not dnssec-signed. this should be interpreted simply as "not doing dane", but it could be useful to warn domain owners about, they may be under the impression they are dane-protected.
500 hostResult = tlsrpt.MakeResult(tlsrpt.NoPolicyFound, host.Domain)
501 }
502 } else if tlsMode == smtpclient.TLSSkip {
503 metricDestinationsAuthentic.Inc()
504
505 // TLSSkip is used to fallback to plaintext, which is used with a TLS-Required: No
506 // header to ignore the recipient domain's DANE policy.
507
508 // possible err is propagated to below.
509 } else {
510 metricDestinationsAuthentic.Inc()
511
512 // Look for TLSA records in either the expandedHost, or otherwise the original
513 // host. ../rfc/7672:912
514 var tlsaBaseDomain dns.Domain
515 tlsDANE, daneRecords, tlsaBaseDomain, err = smtpclient.GatherTLSA(ctx, log.Logger, resolver, host.Domain, expandedNextHopAuthentic && expandedAuthentic, expandedHost)
516 if tlsDANE {
517 metricDestinationDANERequired.Inc()
518 }
519 if err != nil {
520 metricDestinationDANEGatherTLSAErrors.Inc()
521 }
522 if err == nil && tlsDANE {
523 tlsMode = smtpclient.TLSRequiredStartTLS
524 hostResult = tlsrpt.Result{Policy: tlsrpt.TLSAPolicy(daneRecords, tlsaBaseDomain)}
525 if len(daneRecords) == 0 {
526 // If there are no usable DANE records, we still have to use TLS, but without
527 // verifying its certificate. At least when there is no MTA-STS. Why? Perhaps to
528 // prevent ossification? The SMTP TLSA specification has different behaviour than
529 // the generic TLSA. "Usable" means different things in different places.
530 // ../rfc/7672:718 ../rfc/6698:1845 ../rfc/6698:660
531 log.Debug("no usable dane records, requiring starttls but not verifying with dane")
532 metricDestinationDANESTARTTLSUnverified.Inc()
533 daneRecords = nil
534 // Result: ../rfc/8460:576 (this isn't technicall invalid, only all-unusable...)
535 hostResult.FailureDetails = []tlsrpt.FailureDetails{
536 {
537 ResultType: tlsrpt.ResultTLSAInvalid,
538 ReceivingMXHostname: host.XString(false),
539 FailureReasonCode: "all-unusable-records+ignored",
540 },
541 }
542 } else {
543 log.Debug("delivery with required starttls with dane verification", slog.Any("allowedtlshostnames", tlsHostnames))
544 }
545 // Based on CNAMEs followed and DNSSEC-secure status, we must allow up to 4 host
546 // names.
547 tlsHostnames = smtpclient.GatherTLSANames(haveMX, expandedNextHopAuthentic, expandedAuthentic, origNextHop, expandedNextHop, host.Domain, tlsaBaseDomain)
548 } else if !tlsDANE {
549 log.Debugx("not doing opportunistic dane after gathering tlsa records", err)
550 err = nil
551 hostResult = tlsrpt.MakeResult(tlsrpt.NoPolicyFound, tlsaBaseDomain)
552 } else if err != nil {
553 fd := tlsrpt.Details(tlsrpt.ResultTLSAInvalid, "")
554 var errCode adns.ErrorCode
555 if errors.As(err, &errCode) {
556 fd.FailureReasonCode = fmt.Sprintf("extended-dns-error-%d-%s", errCode, strings.ReplaceAll(errCode.String(), " ", "-"))
557 if errCode.IsAuthentication() {
558 // Result: ../rfc/8460:580
559 fd.ResultType = tlsrpt.ResultDNSSECInvalid
560 countResultFailure()
561 }
562 }
563 hostResult = tlsrpt.Result{
564 Policy: tlsrpt.TLSAPolicy(daneRecords, tlsaBaseDomain),
565 FailureDetails: []tlsrpt.FailureDetails{fd},
566 }
567
568 if tlsRequiredNo {
569 log.Debugx("error gathering dane tlsa records with dane required, but continuing without validation due to tls-required-no message header", err)
570 err = nil
571 metricTLSRequiredNoIgnored.WithLabelValues("badtlsa").Inc()
572 }
573 }
574 // else, err is propagated below.
575 }
576
577 // todo: for requiretls, should an MTA-STS policy in mode testing be treated as good enough for requiretls? let's be strict and assume not.
578 // todo: ../rfc/8689:276 seems to specify stricter requirements on name in certificate than DANE (which allows original recipient domain name and cname-expanded name, and hints at following CNAME for MX targets as well, allowing both their original and expanded names too). perhaps the intent was just to say the name must be validated according to the relevant specifications?
579 // todo: for requiretls, should we allow no usable dane records with requiretls? dane allows it, but doesn't seem in spirit of requiretls, so not allowing it.
580 if err == nil && m0.RequireTLS != nil && *m0.RequireTLS && !(tlsDANE && len(daneRecords) > 0) && !enforceMTASTS {
581 log.Info("verified tls is required, but destination has no usable dane records and no mta-sts policy, canceling delivery attempt to host")
582 metricRequireTLSUnsupported.WithLabelValues("nopolicy").Inc()
583 // Resond with proper enhanced status code. ../rfc/8689:301
584 smtpErr := smtpclient.Error{
585 Code: smtp.C554TransactionFailed,
586 Secode: smtp.SePol7MissingReqTLS30,
587 Err: fmt.Errorf("missing required tls verification mechanism"),
588 }
589 return deliverResult{err: smtpErr}
590 }
591
592 // Dial the remote host given the IPs if no error yet.
593 var conn net.Conn
594 if err == nil {
595 connectionCounter.Add(1)
596 conn, remoteIP, err = smtpclient.Dial(ctx, log.Logger, dialer, host, ips, 25, m0.DialedIPs, mox.Conf.Static.SpecifiedSMTPListenIPs)
597 }
598 cancel()
599
600 // Set error for metrics.
601 var dialResult string
602 switch {
603 case err == nil:
604 dialResult = "ok"
605 case errors.Is(err, os.ErrDeadlineExceeded), errors.Is(err, context.DeadlineExceeded):
606 dialResult = "timeout"
607 case errors.Is(err, context.Canceled):
608 dialResult = "canceled"
609 default:
610 dialResult = "error"
611 }
612 metricConnection.WithLabelValues(dialResult).Inc()
613 if err != nil {
614 log.Debugx("connecting to remote smtp", err, slog.Any("host", host))
615 return deliverResult{err: fmt.Errorf("dialing smtp server: %v", err)}
616 }
617
618 var mailFrom string
619 if m0.SenderLocalpart != "" || !m0.SenderDomain.IsZero() {
620 mailFrom = m0.Sender().XString(m0.SMTPUTF8)
621 }
622
623 // todo future: get closer to timeouts specified in rfc? ../rfc/5321:3610
624 log = log.With(slog.Any("remoteip", remoteIP))
625 ctx, cancel = context.WithTimeout(mox.Shutdown, 30*time.Minute)
626 defer cancel()
627 mox.Connections.Register(conn, "smtpclient", "queue")
628
629 // Initialize SMTP session, sending EHLO/HELO and STARTTLS with specified tls mode.
630 var firstHost dns.Domain
631 var moreHosts []dns.Domain
632 if len(tlsHostnames) > 0 {
633 // For use with DANE-TA.
634 firstHost = tlsHostnames[0]
635 moreHosts = tlsHostnames[1:]
636 }
637 var verifiedRecord adns.TLSA
638 opts := smtpclient.Opts{
639 IgnoreTLSVerifyErrors: tlsRequiredNo,
640 RootCAs: mox.Conf.Static.TLS.CertPool,
641 DANERecords: daneRecords,
642 DANEMoreHostnames: moreHosts,
643 DANEVerifiedRecord: &verifiedRecord,
644 RecipientDomainResult: recipientDomainResult,
645 HostResult: &hostResult,
646 }
647 sc, err := smtpclient.New(ctx, log.Logger, conn, tlsMode, tlsPKIX, ourHostname, firstHost, opts)
648 defer func() {
649 if sc == nil {
650 conn.Close()
651 } else {
652 sc.Close()
653 }
654 mox.Connections.Unregister(conn)
655 }()
656 if err == nil && m0.SenderAccount != "" {
657 // Remember the STARTTLS and REQUIRETLS support for this recipient domain.
658 // It is used in the webmail client, to show the recipient domain security mechanisms.
659 // We always save only the last connection we actually encountered. There may be
660 // multiple MX hosts, perhaps only some support STARTTLS and REQUIRETLS. We may not
661 // be accurate for the whole domain, but we're only storing a hint.
662 rdt := store.RecipientDomainTLS{
663 Domain: m0.RecipientDomain.Domain.Name(),
664 STARTTLS: sc.TLSConnectionState() != nil,
665 RequireTLS: sc.SupportsRequireTLS(),
666 }
667 if err = updateRecipientDomainTLS(ctx, log, m0.SenderAccount, rdt); err != nil {
668 err = fmt.Errorf("storing recipient domain tls status: %w", err)
669 }
670 }
671
672 inspectError := func(err error) error {
673 if cerr, ok := err.(smtpclient.Error); ok {
674 // If we are being rejected due to policy reasons on the first
675 // attempt and remote has both IPv4 and IPv6, we'll give it
676 // another try. Our first IP may be in a block list, the address for
677 // the other family perhaps is not.
678
679 if cerr.Permanent && m0.Attempts == 1 && dualstack && strings.HasPrefix(cerr.Secode, "7.") {
680 log.Debugx("change error type from permanent to transient", err, slog.Any("host", host), slog.Any("secode", cerr.Secode))
681 cerr.Permanent = false
682 }
683 // If server does not implement requiretls, respond with that code. ../rfc/8689:301
684 if errors.Is(cerr.Err, smtpclient.ErrRequireTLSUnsupported) {
685 cerr.Secode = smtp.SePol7MissingReqTLS30
686 metricRequireTLSUnsupported.WithLabelValues("norequiretls").Inc()
687 }
688 return cerr
689 }
690 return err
691 }
692
693 if err != nil {
694 return deliverResult{err: inspectError(err)}
695 }
696
697 // SMTP session is ready. Finally try to actually deliver.
698 has8bit := m0.Has8bit
699 smtputf8 := m0.SMTPUTF8
700 var msg io.Reader = msgr
701 resetReader := msgr.Reset
702 size := m0.Size
703 if m0.DSNUTF8 != nil && sc.Supports8BITMIME() && sc.SupportsSMTPUTF8() {
704 has8bit = true
705 smtputf8 = true
706 size = int64(len(m0.DSNUTF8))
707 msg = bytes.NewReader(m0.DSNUTF8)
708 resetReader = func() {
709 msg = bytes.NewReader(m0.DSNUTF8)
710 }
711 }
712
713 // Try to deliver messages. We'll do multiple transactions if the smtp server responds
714 // with "too many recipients".
715 todo := msgResps
716 var delivered, failed []*msgResp
717 for len(todo) > 0 {
718 resetReader()
719
720 // SMTP server may limit number of recipients in single transaction.
721 n := len(todo)
722 if sc.ExtLimitRcptMax > 0 && sc.ExtLimitRcptMax < len(todo) {
723 n = sc.ExtLimitRcptMax
724 }
725
726 rcpts := make([]string, n)
727 for i, mr := range todo[:n] {
728 rcpts[i] = mr.msg.Recipient().XString(m0.SMTPUTF8)
729 }
730
731 resps, err := sc.DeliverMultiple(ctx, mailFrom, rcpts, size, msg, has8bit, smtputf8, m0.RequireTLS != nil && *m0.RequireTLS)
732 if err != nil && (len(resps) == 0 && n == len(msgResps) || len(resps) == len(msgResps)) {
733 // If error and it applies to all recipients, return a single error.
734 return deliverResult{err: inspectError(err)}
735 }
736 var ntodo []*msgResp
737 for i, mr := range todo[:n] {
738 if err != nil {
739 if cerr, ok := err.(smtpclient.Error); ok {
740 mr.resp = smtpclient.Response(cerr)
741 } else {
742 mr.resp = smtpclient.Response{Err: err}
743 }
744 failed = append(failed, mr)
745 } else if i > 0 && (resps[i].Code == smtp.C452StorageFull || resps[i].Code == smtp.C552MailboxFull) {
746 ntodo = append(ntodo, mr)
747 } else if resps[i].Code == smtp.C250Completed {
748 delivered = append(delivered, mr)
749 } else {
750 failed = append(failed, mr)
751 }
752 }
753 todo = append(ntodo, todo[n:]...)
754
755 // We don't take LIMITS MAILMAX into account. Multiple MAIL commands are normal in
756 // SMTP. If the server doesn't support that, it will likely return a temporary
757 // error. So at least we'll try again. This would be quite unusual. And wasteful,
758 // because we would immediately dial again, do the TLS handshake, EHLO, etc. Let's
759 // implement such a limit when we see it in practice.
760 }
761
762 return deliverResult{delivered: delivered, failed: failed}
763}
764
765// Update (overwite) last known starttls/requiretls support for recipient domain.
766func updateRecipientDomainTLS(ctx context.Context, log mlog.Log, senderAccount string, rdt store.RecipientDomainTLS) error {
767 acc, err := store.OpenAccount(log, senderAccount)
768 if err != nil {
769 return fmt.Errorf("open account: %w", err)
770 }
771 defer func() {
772 err := acc.Close()
773 log.Check(err, "closing account")
774 }()
775 err = acc.DB.Write(ctx, func(tx *bstore.Tx) error {
776 // First delete any existing record.
777 if err := tx.Delete(&store.RecipientDomainTLS{Domain: rdt.Domain}); err != nil && err != bstore.ErrAbsent {
778 return fmt.Errorf("removing previous recipient domain tls status: %w", err)
779 }
780 // Insert new record.
781 return tx.Insert(&rdt)
782 })
783 if err != nil {
784 return fmt.Errorf("adding recipient domain tls status to account database: %w", err)
785 }
786 return nil
787}
788