1package queue
2
3import (
4 "bytes"
5 "context"
6 "errors"
7 "fmt"
8 "io"
9 "net"
10 "os"
11 "strings"
12 "time"
13
14 "github.com/mjl-/bstore"
15
16 "github.com/mjl-/mox/dns"
17 "github.com/mjl-/mox/dsn"
18 "github.com/mjl-/mox/mlog"
19 "github.com/mjl-/mox/mox-"
20 "github.com/mjl-/mox/mtasts"
21 "github.com/mjl-/mox/mtastsdb"
22 "github.com/mjl-/mox/smtpclient"
23 "github.com/mjl-/mox/store"
24)
25
26// todo: rename function, perhaps put some of the params in a delivery struct so we don't pass all the params all the time?
27func fail(qlog *mlog.Log, m Msg, backoff time.Duration, permanent bool, remoteMTA dsn.NameIP, secodeOpt, errmsg string) {
28 if permanent || m.Attempts >= 8 {
29 qlog.Errorx("permanent failure delivering from queue", errors.New(errmsg))
30 queueDSNFailure(qlog, m, remoteMTA, secodeOpt, errmsg)
31
32 if err := queueDelete(context.Background(), m.ID); err != nil {
33 qlog.Errorx("deleting message from queue after permanent failure", err)
34 }
35 return
36 }
37
38 qup := bstore.QueryDB[Msg](context.Background(), DB)
39 qup.FilterID(m.ID)
40 if _, err := qup.UpdateNonzero(Msg{LastError: errmsg, DialedIPs: m.DialedIPs}); err != nil {
41 qlog.Errorx("storing delivery error", err, mlog.Field("deliveryerror", errmsg))
42 }
43
44 if m.Attempts == 5 {
45 // We've attempted deliveries at these intervals: 0, 7.5m, 15m, 30m, 1h, 2u.
46 // Let sender know delivery is delayed.
47 qlog.Errorx("temporary failure delivering from queue, sending delayed dsn", errors.New(errmsg), mlog.Field("backoff", backoff))
48
49 retryUntil := m.LastAttempt.Add((4 + 8 + 16) * time.Hour)
50 queueDSNDelay(qlog, m, remoteMTA, secodeOpt, errmsg, retryUntil)
51 } else {
52 qlog.Errorx("temporary failure delivering from queue", errors.New(errmsg), mlog.Field("backoff", backoff), mlog.Field("nextattempt", m.NextAttempt))
53 }
54}
55
56// Delivery by directly dialing MX hosts for destination domain.
57func deliverDirect(cid int64, qlog *mlog.Log, resolver dns.Resolver, dialer contextDialer, ourHostname dns.Domain, transportName string, m Msg, backoff time.Duration) {
58 hosts, effectiveDomain, permanent, err := gatherHosts(resolver, m, cid, qlog)
59 if err != nil {
60 fail(qlog, m, backoff, permanent, dsn.NameIP{}, "", err.Error())
61 return
62 }
63
64 // Check for MTA-STS policy and enforce it if needed. We have to check the
65 // effective domain (found after following CNAME record(s)): there will certainly
66 // not be an mtasts record for the original recipient domain, because that is not
67 // allowed when a CNAME record is present.
68 var policyFresh bool
69 var policy *mtasts.Policy
70 tlsModeDefault := smtpclient.TLSOpportunistic
71 if !effectiveDomain.IsZero() {
72 cidctx := context.WithValue(mox.Shutdown, mlog.CidKey, cid)
73 policy, policyFresh, err = mtastsdb.Get(cidctx, resolver, effectiveDomain)
74 if err != nil {
75 // No need to refuse to deliver if we have some mtasts error.
76 qlog.Infox("mtasts failed, continuing with strict tls requirement", err, mlog.Field("domain", effectiveDomain))
77 tlsModeDefault = smtpclient.TLSStrictStartTLS
78 }
79 // note: policy can be nil, if a domain does not implement MTA-STS or its the first
80 // time we fetch the policy and if we encountered an error.
81 }
82
83 // We try delivery to each record until we have success or a permanent failure. So
84 // for transient errors, we'll try the next MX record. For MX records pointing to a
85 // dual stack host, we turn a permanent failure due to policy on the first delivery
86 // attempt into a temporary failure and make sure to try the other address family
87 // the next attempt. This should reduce issues due to one of our IPs being on a
88 // block list. We won't try multiple IPs of the same address family. Surprisingly,
89 // RFC 5321 does not specify a clear algorithm, but common practicie is probably
90 // ../rfc/3974:268.
91 var remoteMTA dsn.NameIP
92 var secodeOpt, errmsg string
93 permanent = false
94 mtastsFailure := true
95 // todo: should make distinction between host permanently not accepting the message, and the message not being deliverable permanently. e.g. a mx host may have a size limit, or not accept 8bitmime, while another host in the list does accept the message. same for smtputf8, ../rfc/6531:555
96 for _, h := range hosts {
97 var badTLS, ok bool
98
99 // ../rfc/8461:913
100 if policy != nil && policy.Mode == mtasts.ModeEnforce && !policy.Matches(h.Domain) {
101 var policyHosts []string
102 for _, mx := range policy.MX {
103 policyHosts = append(policyHosts, mx.LogString())
104 }
105 errmsg = fmt.Sprintf("mx host %s does not match enforced mta-sts policy with hosts %s", h.Domain, strings.Join(policyHosts, ","))
106 qlog.Error("mx host does not match enforce mta-sts policy, skipping", mlog.Field("host", h.Domain), mlog.Field("policyhosts", policyHosts))
107 continue
108 }
109
110 qlog.Info("delivering to remote", mlog.Field("remote", h), mlog.Field("queuecid", cid))
111 cid := mox.Cid()
112 nqlog := qlog.WithCid(cid)
113 var remoteIP net.IP
114 tlsMode := tlsModeDefault
115 if policy != nil && policy.Mode == mtasts.ModeEnforce {
116 tlsMode = smtpclient.TLSStrictStartTLS
117 }
118 permanent, badTLS, secodeOpt, remoteIP, errmsg, ok = deliverHost(nqlog, resolver, dialer, cid, ourHostname, transportName, h, &m, tlsMode)
119 if !ok && badTLS && tlsMode == smtpclient.TLSOpportunistic {
120 // In case of failure with opportunistic TLS, try again without TLS. ../rfc/7435:459
121 // todo future: revisit this decision. perhaps it should be a configuration option that defaults to not doing this?
122 nqlog.Info("connecting again for delivery attempt without tls")
123 permanent, badTLS, secodeOpt, remoteIP, errmsg, ok = deliverHost(nqlog, resolver, dialer, cid, ourHostname, transportName, h, &m, smtpclient.TLSSkip)
124 }
125 if ok {
126 nqlog.Info("delivered from queue")
127 if err := queueDelete(context.Background(), m.ID); err != nil {
128 nqlog.Errorx("deleting message from queue after delivery", err)
129 }
130 return
131 }
132 remoteMTA = dsn.NameIP{Name: h.XString(false), IP: remoteIP}
133 if !badTLS {
134 mtastsFailure = false
135 }
136 if permanent {
137 break
138 }
139 }
140 if mtastsFailure && policyFresh {
141 permanent = true
142 }
143
144 fail(qlog, m, backoff, permanent, remoteMTA, secodeOpt, errmsg)
145}
146
147var (
148 errCNAMELoop = errors.New("cname loop")
149 errCNAMELimit = errors.New("too many cname records")
150 errNoRecord = errors.New("no dns record")
151 errDNS = errors.New("dns lookup error")
152 errNoMail = errors.New("domain does not accept email as indicated with single dot for mx record")
153)
154
155// Gather hosts to try to deliver to. We start with the straight-forward MX record.
156// If that does not exist, we'll look for CNAME of the entire domain (following
157// chains if needed). If a CNAME does not exist, but the domain name has an A or
158// AAAA record, we'll try delivery directly to that host.
159// ../rfc/5321:3824
160func gatherHosts(resolver dns.Resolver, m Msg, cid int64, qlog *mlog.Log) (hosts []dns.IPDomain, effectiveDomain dns.Domain, permanent bool, err error) {
161 if len(m.RecipientDomain.IP) > 0 {
162 return []dns.IPDomain{m.RecipientDomain}, effectiveDomain, false, nil
163 }
164
165 // We start out delivering to the recipient domain. We follow CNAMEs a few times.
166 rcptDomain := m.RecipientDomain.Domain
167 // Domain we are actually delivering to, after following CNAME record(s).
168 effectiveDomain = rcptDomain
169 domainsSeen := map[string]bool{}
170 for i := 0; ; i++ {
171 if domainsSeen[effectiveDomain.ASCII] {
172 return nil, effectiveDomain, true, fmt.Errorf("%w: recipient domain %s: already saw %s", errCNAMELoop, rcptDomain, effectiveDomain)
173 }
174 domainsSeen[effectiveDomain.ASCII] = true
175
176 // note: The Go resolver returns the requested name if the domain has no CNAME record but has a host record.
177 if i == 16 {
178 // We have a maximum number of CNAME records we follow. There is no hard limit for
179 // DNS, and you might think folks wouldn't configure CNAME chains at all, but for
180 // (non-mail) domains, CNAME chains of 10 records have been encountered according
181 // to the internet.
182 return nil, effectiveDomain, true, fmt.Errorf("%w: recipient domain %s, last resolved domain %s", errCNAMELimit, rcptDomain, effectiveDomain)
183 }
184
185 cidctx := context.WithValue(mox.Context, mlog.CidKey, cid)
186 ctx, cancel := context.WithTimeout(cidctx, 30*time.Second)
187 defer cancel()
188 // Note: LookupMX can return an error and still return records: Invalid records are
189 // filtered out and an error returned. We must process any records that are valid.
190 // Only if all are unusable will we return an error. ../rfc/5321:3851
191 mxl, err := resolver.LookupMX(ctx, effectiveDomain.ASCII+".")
192 cancel()
193 if err != nil && len(mxl) == 0 {
194 if !dns.IsNotFound(err) {
195 return nil, effectiveDomain, false, fmt.Errorf("%w: mx lookup for %s: %v", errDNS, effectiveDomain, err)
196 }
197
198 // No MX record. First attempt CNAME lookup. ../rfc/5321:3838 ../rfc/3974:197
199 ctx, cancel = context.WithTimeout(cidctx, 30*time.Second)
200 defer cancel()
201 cname, err := resolver.LookupCNAME(ctx, effectiveDomain.ASCII+".")
202 cancel()
203 if err != nil && !dns.IsNotFound(err) {
204 return nil, effectiveDomain, false, fmt.Errorf("%w: cname lookup for %s: %v", errDNS, effectiveDomain, err)
205 }
206 if err == nil && cname != effectiveDomain.ASCII+"." {
207 d, err := dns.ParseDomain(strings.TrimSuffix(cname, "."))
208 if err != nil {
209 return nil, effectiveDomain, true, fmt.Errorf("%w: parsing cname domain %s: %v", errDNS, effectiveDomain, err)
210 }
211 effectiveDomain = d
212 // Start again with new domain.
213 continue
214 }
215
216 // See if the host exists. If so, attempt delivery directly to host. ../rfc/5321:3842
217 ctx, cancel = context.WithTimeout(cidctx, 30*time.Second)
218 defer cancel()
219 _, err = resolver.LookupHost(ctx, effectiveDomain.ASCII+".")
220 cancel()
221 if dns.IsNotFound(err) {
222 return nil, effectiveDomain, true, fmt.Errorf("%w: recipient domain/host %s", errNoRecord, effectiveDomain)
223 } else if err != nil {
224 return nil, effectiveDomain, false, fmt.Errorf("%w: looking up host %s because of no mx record: %v", errDNS, effectiveDomain, err)
225 }
226 hosts = []dns.IPDomain{{Domain: effectiveDomain}}
227 } else if err != nil {
228 qlog.Infox("partial mx failure, attempting delivery to valid mx records", err)
229 }
230
231 // ../rfc/7505:122
232 if err == nil && len(mxl) == 1 && mxl[0].Host == "." {
233 return nil, effectiveDomain, true, errNoMail
234 }
235
236 // The Go resolver already sorts by preference, randomizing records of same
237 // preference. ../rfc/5321:3885
238 for _, mx := range mxl {
239 host, err := dns.ParseDomain(strings.TrimSuffix(mx.Host, "."))
240 if err != nil {
241 // note: should not happen because Go resolver already filters these out.
242 return nil, effectiveDomain, true, fmt.Errorf("%w: invalid host name in mx record %q: %v", errDNS, mx.Host, err)
243 }
244 hosts = append(hosts, dns.IPDomain{Domain: host})
245 }
246 if len(hosts) > 0 {
247 err = nil
248 }
249 return hosts, effectiveDomain, false, err
250 }
251}
252
253// deliverHost attempts to deliver m to host.
254// deliverHost updated m.DialedIPs, which must be saved in case of failure to deliver.
255func deliverHost(log *mlog.Log, resolver dns.Resolver, dialer contextDialer, cid int64, ourHostname dns.Domain, transportName string, host dns.IPDomain, m *Msg, tlsMode smtpclient.TLSMode) (permanent, badTLS bool, secodeOpt string, remoteIP net.IP, errmsg string, ok bool) {
256 // About attempting delivery to multiple addresses of a host: ../rfc/5321:3898
257
258 start := time.Now()
259 var deliveryResult string
260 defer func() {
261 metricDelivery.WithLabelValues(fmt.Sprintf("%d", m.Attempts), transportName, string(tlsMode), deliveryResult).Observe(float64(time.Since(start)) / float64(time.Second))
262 log.Debug("queue deliverhost result", mlog.Field("host", host), mlog.Field("attempt", m.Attempts), mlog.Field("tlsmode", tlsMode), mlog.Field("permanent", permanent), mlog.Field("badtls", badTLS), mlog.Field("secodeopt", secodeOpt), mlog.Field("errmsg", errmsg), mlog.Field("ok", ok), mlog.Field("duration", time.Since(start)))
263 }()
264
265 f, err := os.Open(m.MessagePath())
266 if err != nil {
267 return false, false, "", nil, fmt.Sprintf("open message file: %s", err), false
268 }
269 msgr := store.FileMsgReader(m.MsgPrefix, f)
270 defer func() {
271 err := msgr.Close()
272 log.Check(err, "closing message after delivery attempt")
273 }()
274
275 cidctx := context.WithValue(mox.Context, mlog.CidKey, cid)
276 ctx, cancel := context.WithTimeout(cidctx, 30*time.Second)
277 defer cancel()
278
279 conn, ip, dualstack, err := dialHost(ctx, log, resolver, dialer, host, 25, m)
280 remoteIP = ip
281 cancel()
282 var result string
283 switch {
284 case err == nil:
285 result = "ok"
286 case errors.Is(err, os.ErrDeadlineExceeded), errors.Is(err, context.DeadlineExceeded):
287 result = "timeout"
288 case errors.Is(err, context.Canceled):
289 result = "canceled"
290 default:
291 result = "error"
292 }
293 metricConnection.WithLabelValues(result).Inc()
294 if err != nil {
295 log.Debugx("connecting to remote smtp", err, mlog.Field("host", host))
296 return false, false, "", ip, fmt.Sprintf("dialing smtp server: %v", err), false
297 }
298
299 var mailFrom string
300 if m.SenderLocalpart != "" || !m.SenderDomain.IsZero() {
301 mailFrom = m.Sender().XString(m.SMTPUTF8)
302 }
303 rcptTo := m.Recipient().XString(m.SMTPUTF8)
304
305 // todo future: get closer to timeouts specified in rfc? ../rfc/5321:3610
306 log = log.Fields(mlog.Field("remoteip", ip))
307 ctx, cancel = context.WithTimeout(cidctx, 30*time.Minute)
308 defer cancel()
309 mox.Connections.Register(conn, "smtpclient", "queue")
310 sc, err := smtpclient.New(ctx, log, conn, tlsMode, ourHostname, host.Domain, nil)
311 defer func() {
312 if sc == nil {
313 conn.Close()
314 } else {
315 sc.Close()
316 }
317 mox.Connections.Unregister(conn)
318 }()
319 if err == nil {
320 has8bit := m.Has8bit
321 smtputf8 := m.SMTPUTF8
322 var msg io.Reader = msgr
323 size := m.Size
324 if m.DSNUTF8 != nil && sc.Supports8BITMIME() && sc.SupportsSMTPUTF8() {
325 has8bit = true
326 smtputf8 = true
327 size = int64(len(m.DSNUTF8))
328 msg = bytes.NewReader(m.DSNUTF8)
329 }
330 err = sc.Deliver(ctx, mailFrom, rcptTo, size, msg, has8bit, smtputf8)
331 }
332 if err != nil {
333 log.Infox("delivery failed", err)
334 }
335 var cerr smtpclient.Error
336 switch {
337 case err == nil:
338 deliveryResult = "ok"
339 case errors.Is(err, os.ErrDeadlineExceeded), errors.Is(err, context.DeadlineExceeded):
340 deliveryResult = "timeout"
341 case errors.Is(err, context.Canceled):
342 deliveryResult = "canceled"
343 case errors.As(err, &cerr):
344 deliveryResult = "temperror"
345 if cerr.Permanent {
346 deliveryResult = "permerror"
347 }
348 default:
349 deliveryResult = "error"
350 }
351 if err == nil {
352 return false, false, "", ip, "", true
353 } else if cerr, ok := err.(smtpclient.Error); ok {
354 // If we are being rejected due to policy reasons on the first
355 // attempt and remote has both IPv4 and IPv6, we'll give it
356 // another try. Our first IP may be in a block list, the address for
357 // the other family perhaps is not.
358 permanent := cerr.Permanent
359 if permanent && m.Attempts == 1 && dualstack && strings.HasPrefix(cerr.Secode, "7.") {
360 permanent = false
361 }
362 return permanent, errors.Is(cerr, smtpclient.ErrTLS), cerr.Secode, ip, cerr.Error(), false
363 } else {
364 return false, errors.Is(cerr, smtpclient.ErrTLS), "", ip, err.Error(), false
365 }
366}
367