1package smtpclient
2
3import (
4 "context"
5 "crypto/sha256"
6 "crypto/sha512"
7 "crypto/x509"
8 "errors"
9 "fmt"
10 "log/slog"
11 "net"
12 "sort"
13 "strings"
14 "time"
15
16 "github.com/mjl-/adns"
17
18 "github.com/mjl-/mox/dns"
19 "github.com/mjl-/mox/mlog"
20)
21
22var (
23 errCNAMELoop = errors.New("cname loop")
24 errCNAMELimit = errors.New("too many cname records")
25 errDNS = errors.New("dns lookup error")
26 errNoMail = errors.New("domain does not accept email as indicated with single dot for mx record")
27)
28
29// HostPref is a host for delivery, with preference for MX records.
30type HostPref struct {
31 Host dns.IPDomain
32 Pref int // -1 when not an MX record.
33}
34
35// GatherDestinations looks up the hosts to deliver email to a domain ("next-hop").
36// If it is an IP address, it is the only destination to try. Otherwise CNAMEs of
37// the domain are followed. Then MX records for the expanded CNAME are looked up.
38// If no MX record is present, the original domain is returned. If an MX record is
39// present but indicates the domain does not accept email, ErrNoMail is returned.
40// If valid MX records were found, the MX target hosts are returned.
41//
42// haveMX indicates if an MX record was found.
43//
44// origNextHopAuthentic indicates if the DNS record for the initial domain name was
45// DNSSEC secure (CNAME, MX).
46//
47// expandedNextHopAuthentic indicates if the DNS records after following CNAMEs were
48// DNSSEC secure.
49//
50// These authentic results are needed for DANE, to determine where to look up TLSA
51// records, and which names to allow in the remote TLS certificate. If MX records
52// were found, both the original and expanded next-hops must be authentic for DANE
53// to be option. For a non-IP with no MX records found, the authentic result can
54// be used to decide which of the names to use as TLSA base domain.
55func GatherDestinations(ctx context.Context, elog *slog.Logger, resolver dns.Resolver, origNextHop dns.IPDomain) (haveMX, origNextHopAuthentic, expandedNextHopAuthentic bool, expandedNextHop dns.Domain, hostPrefs []HostPref, permanent bool, err error) {
56 // ../rfc/5321:3824
57
58 log := mlog.New("smtpclient", elog)
59
60 // IP addresses are dialed directly, and don't have TLSA records.
61 if len(origNextHop.IP) > 0 {
62 return false, false, false, expandedNextHop, []HostPref{{origNextHop, -1}}, false, nil
63 }
64
65 // We start out assuming the result is authentic. Updated with each lookup.
66 origNextHopAuthentic = true
67 expandedNextHopAuthentic = true
68
69 // We start out delivering to the recipient domain. We follow CNAMEs.
70 rcptDomain := origNextHop.Domain
71 // Domain we are actually delivering to, after following CNAME record(s).
72 expandedNextHop = rcptDomain
73 // Keep track of CNAMEs we have followed, to detect loops.
74 domainsSeen := map[string]bool{}
75 for i := 0; ; i++ {
76 if domainsSeen[expandedNextHop.ASCII] {
77 // todo: only mark as permanent failure if TTLs for all records are beyond latest possibly delivery retry we would do.
78 err := fmt.Errorf("%w: recipient domain %s: already saw %s", errCNAMELoop, rcptDomain, expandedNextHop)
79 return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
80 }
81 domainsSeen[expandedNextHop.ASCII] = true
82
83 // note: The Go resolver returns the requested name if the domain has no CNAME
84 // record but has a host record.
85 if i == 16 {
86 // We have a maximum number of CNAME records we follow. There is no hard limit for
87 // DNS, and you might think folks wouldn't configure CNAME chains at all, but for
88 // (non-mail) domains, CNAME chains of 10 records have been encountered according
89 // to the internet.
90 // todo: only mark as permanent failure if TTLs for all records are beyond latest possibly delivery retry we would do.
91 err := fmt.Errorf("%w: recipient domain %s, last resolved domain %s", errCNAMELimit, rcptDomain, expandedNextHop)
92 return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
93 }
94
95 // Do explicit CNAME lookup. Go's LookupMX also resolves CNAMEs, but we want to
96 // know the final name, and we're interested in learning if the first vs later
97 // results were DNSSEC-(in)secure.
98 // ../rfc/5321:3838 ../rfc/3974:197
99 cctx, ccancel := context.WithTimeout(ctx, 30*time.Second)
100 defer ccancel()
101 cname, cnameResult, err := resolver.LookupCNAME(cctx, expandedNextHop.ASCII+".")
102 ccancel()
103 if i == 0 {
104 origNextHopAuthentic = origNextHopAuthentic && cnameResult.Authentic
105 }
106 expandedNextHopAuthentic = expandedNextHopAuthentic && cnameResult.Authentic
107 if err != nil && !dns.IsNotFound(err) {
108 err = fmt.Errorf("%w: cname lookup for %s: %v", errDNS, expandedNextHop, err)
109 return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
110 }
111 if err == nil && cname != expandedNextHop.ASCII+"." {
112 d, err := dns.ParseDomain(strings.TrimSuffix(cname, "."))
113 if err != nil {
114 // todo: only mark as permanent failure if TTLs for all records are beyond latest possibly delivery retry we would do.
115 err = fmt.Errorf("%w: parsing cname domain %s: %v", errDNS, expandedNextHop, err)
116 return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
117 }
118 expandedNextHop = d
119 // Start again with new domain.
120 continue
121 }
122
123 // Not a CNAME, so lookup MX record.
124 mctx, mcancel := context.WithTimeout(ctx, 30*time.Second)
125 defer mcancel()
126 // Note: LookupMX can return an error and still return records: Invalid records are
127 // filtered out and an error returned. We must process any records that are valid.
128 // Only if all are unusable will we return an error. ../rfc/5321:3851
129 mxl, mxResult, err := resolver.LookupMX(mctx, expandedNextHop.ASCII+".")
130 mcancel()
131 if i == 0 {
132 origNextHopAuthentic = origNextHopAuthentic && mxResult.Authentic
133 }
134 expandedNextHopAuthentic = expandedNextHopAuthentic && mxResult.Authentic
135 if err != nil && len(mxl) == 0 {
136 if !dns.IsNotFound(err) {
137 err = fmt.Errorf("%w: mx lookup for %s: %v", errDNS, expandedNextHop, err)
138 return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
139 }
140
141 // No MX record, attempt delivery directly to host. ../rfc/5321:3842
142 hostPrefs = []HostPref{{dns.IPDomain{Domain: expandedNextHop}, -1}}
143 return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, hostPrefs, false, nil
144 } else if err != nil {
145 log.Infox("mx record has some invalid records, keeping only the valid mx records", err)
146 }
147
148 // ../rfc/7505:122
149 if err == nil && len(mxl) == 1 && mxl[0].Host == "." {
150 // Note: Depending on MX record TTL, this record may be replaced with a more
151 // receptive MX record before our final delivery attempt. But it's clearly the
152 // explicit desire not to be bothered with email delivery attempts, so mark failure
153 // as permanent.
154 return true, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, true, errNoMail
155 }
156
157 // The Go resolver already sorts by preference, randomizing records of same
158 // preference. ../rfc/5321:3885
159 for _, mx := range mxl {
160 // Parsing lax (unless pedantic mode) for MX targets with underscores as seen in the wild.
161 host, err := dns.ParseDomainLax(strings.TrimSuffix(mx.Host, "."))
162 if err != nil {
163 // note: should not happen because Go resolver already filters these out.
164 err = fmt.Errorf("%w: invalid host name in mx record %q: %v", errDNS, mx.Host, err)
165 return true, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, true, err
166 }
167 hostPrefs = append(hostPrefs, HostPref{dns.IPDomain{Domain: host}, int(mx.Pref)})
168 }
169 if len(hostPrefs) > 0 {
170 err = nil
171 }
172 return true, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, hostPrefs, false, err
173 }
174}
175
176// GatherIPs looks up the IPs to try for connecting to host, with the IPs ordered
177// to take previous attempts into account. For use with DANE, the CNAME-expanded
178// name is returned, and whether the DNS responses were authentic.
179func GatherIPs(ctx context.Context, elog *slog.Logger, resolver dns.Resolver, network string, host dns.IPDomain, dialedIPs map[string][]net.IP) (authentic bool, expandedAuthentic bool, expandedHost dns.Domain, ips []net.IP, dualstack bool, rerr error) {
180 log := mlog.New("smtpclient", elog)
181
182 if len(host.IP) > 0 {
183 return false, false, dns.Domain{}, []net.IP{host.IP}, false, nil
184 }
185
186 authentic = true
187 expandedAuthentic = true
188
189 // The Go resolver automatically follows CNAMEs, which is not allowed for host
190 // names in MX records, but seems to be accepted and is documented for DANE SMTP
191 // behaviour. We resolve CNAMEs explicitly, so we can return the final name, which
192 // DANE needs. ../rfc/7671:246
193 // ../rfc/5321:3861 ../rfc/2181:661 ../rfc/7672:1382 ../rfc/7671:1030
194 name := host.Domain.ASCII + "."
195
196 for i := 0; ; i++ {
197 cname, result, err := resolver.LookupCNAME(ctx, name)
198 if i == 0 {
199 authentic = result.Authentic
200 }
201 expandedAuthentic = expandedAuthentic && result.Authentic
202 if dns.IsNotFound(err) {
203 break
204 } else if err != nil {
205 return authentic, expandedAuthentic, dns.Domain{}, nil, dualstack, err
206 } else if strings.TrimSuffix(cname, ".") == strings.TrimSuffix(name, ".") {
207 break
208 }
209 if i > 10 {
210 return authentic, expandedAuthentic, dns.Domain{}, nil, dualstack, fmt.Errorf("mx lookup: %w", errCNAMELimit)
211 }
212 name = strings.TrimSuffix(cname, ".") + "."
213 }
214
215 if name == host.Domain.ASCII+"." {
216 expandedHost = host.Domain
217 } else {
218 var err error
219 expandedHost, err = dns.ParseDomain(strings.TrimSuffix(name, "."))
220 if err != nil {
221 return authentic, expandedAuthentic, dns.Domain{}, nil, dualstack, fmt.Errorf("parsing cname-resolved domain: %w", err)
222 }
223 }
224
225 ipaddrs, result, err := resolver.LookupIP(ctx, network, name)
226 authentic = authentic && result.Authentic
227 expandedAuthentic = expandedAuthentic && result.Authentic
228 if err != nil || len(ipaddrs) == 0 {
229 return authentic, expandedAuthentic, expandedHost, nil, false, fmt.Errorf("looking up %q: %w", name, err)
230 }
231 var have4, have6 bool
232 for _, ipaddr := range ipaddrs {
233 ips = append(ips, ipaddr)
234 if ipaddr.To4() == nil {
235 have6 = true
236 } else {
237 have4 = true
238 }
239 }
240 dualstack = have4 && have6
241 prevIPs := dialedIPs[host.String()]
242 if len(prevIPs) > 0 {
243 prevIP := prevIPs[len(prevIPs)-1]
244 prevIs4 := prevIP.To4() != nil
245 sameFamily := 0
246 for _, ip := range prevIPs {
247 is4 := ip.To4() != nil
248 if prevIs4 == is4 {
249 sameFamily++
250 }
251 }
252 preferPrev := sameFamily == 1
253 // We use stable sort so any preferred/randomized listing from DNS is kept intact.
254 sort.SliceStable(ips, func(i, j int) bool {
255 aIs4 := ips[i].To4() != nil
256 bIs4 := ips[j].To4() != nil
257 if aIs4 != bIs4 {
258 // Prefer "i" if it is not same address family.
259 return aIs4 != prevIs4
260 }
261 // Prefer "i" if it is the same as last and we should be preferring it.
262 return preferPrev && ips[i].Equal(prevIP)
263 })
264 log.Debug("ordered ips for dialing", slog.Any("ips", ips))
265 }
266 return
267}
268
269// GatherTLSA looks up TLSA record for either expandedHost or host, and returns
270// records usable for DANE with SMTP, and host names to allow in DANE-TA
271// certificate name verification.
272//
273// If no records are found, this isn't necessarily an error. It can just indicate
274// the domain/host does not opt-in to DANE, and nil records and a nil error are
275// returned.
276//
277// Only usable records are returned. If any record was found, DANE is required and
278// this is indicated with daneRequired. If no usable records remain, the caller
279// must do TLS, but not verify the remote TLS certificate.
280//
281// Returned values are always meaningful, also when an error was returned.
282func GatherTLSA(ctx context.Context, elog *slog.Logger, resolver dns.Resolver, host dns.Domain, expandedAuthentic bool, expandedHost dns.Domain) (daneRequired bool, daneRecords []adns.TLSA, tlsaBaseDomain dns.Domain, err error) {
283 log := mlog.New("smtpclient", elog)
284
285 // ../rfc/7672:912
286 // This function is only called when the lookup of host was authentic.
287
288 var l []adns.TLSA
289
290 tlsaBaseDomain = host
291 if host == expandedHost || !expandedAuthentic {
292 l, err = lookupTLSACNAME(ctx, log, resolver, 25, "tcp", host)
293 } else if expandedAuthentic {
294 // ../rfc/7672:934
295 tlsaBaseDomain = expandedHost
296 l, err = lookupTLSACNAME(ctx, log, resolver, 25, "tcp", expandedHost)
297 if err == nil && len(l) == 0 {
298 tlsaBaseDomain = host
299 l, err = lookupTLSACNAME(ctx, log, resolver, 25, "tcp", host)
300 }
301 }
302 if len(l) == 0 || err != nil {
303 daneRequired = err != nil
304 log.Debugx("gathering tlsa records failed", err, slog.Bool("danerequired", daneRequired), slog.Any("basedomain", tlsaBaseDomain))
305 return daneRequired, nil, tlsaBaseDomain, err
306 }
307 daneRequired = len(l) > 0
308 l = filterUsableTLSARecords(log, l)
309 log.Debug("tlsa records exist",
310 slog.Bool("danerequired", daneRequired),
311 slog.Any("records", l),
312 slog.Any("basedomain", tlsaBaseDomain))
313 return daneRequired, l, tlsaBaseDomain, err
314}
315
316// lookupTLSACNAME composes a TLSA domain name to lookup, follows CNAMEs and looks
317// up TLSA records. no TLSA records exist, a nil error is returned as it means
318// the host does not opt-in to DANE.
319func lookupTLSACNAME(ctx context.Context, log mlog.Log, resolver dns.Resolver, port int, protocol string, host dns.Domain) (l []adns.TLSA, rerr error) {
320 name := fmt.Sprintf("_%d._%s.%s", port, protocol, host.ASCII+".")
321 for i := 0; ; i++ {
322 cname, result, err := resolver.LookupCNAME(ctx, name)
323 if dns.IsNotFound(err) {
324 if !result.Authentic {
325 log.Debugx("cname nxdomain result during tlsa lookup not authentic, not doing dane for host", err, slog.Any("host", host), slog.String("name", name))
326 return nil, nil
327 }
328 break
329 } else if err != nil {
330 return nil, fmt.Errorf("looking up cname for tlsa candidate base domain: %w", err)
331 } else if !result.Authentic {
332 log.Debugx("cname result during tlsa lookup not authentic, not doing dane for host", err, slog.Any("host", host), slog.String("name", name))
333 return nil, nil
334 }
335 if i == 10 {
336 return nil, fmt.Errorf("looking up cname for tlsa candidate base domain: %w", errCNAMELimit)
337 }
338 name = strings.TrimSuffix(cname, ".") + "."
339 }
340 var result adns.Result
341 var err error
342 l, result, err = resolver.LookupTLSA(ctx, 0, "", name)
343 if dns.IsNotFound(err) || err == nil && len(l) == 0 {
344 log.Debugx("no tlsa records for host, not doing dane", err,
345 slog.Any("host", host),
346 slog.String("name", name),
347 slog.Bool("authentic", result.Authentic))
348 return nil, nil
349 } else if err != nil {
350 return nil, fmt.Errorf("looking up tlsa records for tlsa candidate base domain: %w", err)
351 } else if !result.Authentic {
352 log.Debugx("tlsa lookup not authentic, not doing dane for host", err, slog.Any("host", host), slog.String("name", name))
353 return nil, nil
354 }
355 return l, nil
356}
357
358func filterUsableTLSARecords(log mlog.Log, l []adns.TLSA) []adns.TLSA {
359 // Gather "usable" records. ../rfc/7672:708
360 o := 0
361 for _, r := range l {
362 // A record is not usable when we don't recognize parameters. ../rfc/6698:649
363
364 switch r.Usage {
365 case adns.TLSAUsageDANETA, adns.TLSAUsageDANEEE:
366 default:
367 // We can regard PKIX-TA and PKIX-EE as "unusable" with SMTP DANE. ../rfc/7672:1304
368 continue
369 }
370 switch r.Selector {
371 case adns.TLSASelectorCert, adns.TLSASelectorSPKI:
372 default:
373 continue
374 }
375 switch r.MatchType {
376 case adns.TLSAMatchTypeFull:
377 if r.Selector == adns.TLSASelectorCert {
378 if _, err := x509.ParseCertificate(r.CertAssoc); err != nil {
379 log.Debugx("parsing certificate in dane tlsa record, ignoring", err)
380 continue
381 }
382 } else if r.Selector == adns.TLSASelectorSPKI {
383 if _, err := x509.ParsePKIXPublicKey(r.CertAssoc); err != nil {
384 log.Debugx("parsing certificate in dane tlsa record, ignoring", err)
385 continue
386 }
387 }
388 case adns.TLSAMatchTypeSHA256:
389 if len(r.CertAssoc) != sha256.Size {
390 log.Debug("dane tlsa record with wrong data size for sha2-256", slog.Int("got", len(r.CertAssoc)), slog.Int("expect", sha256.Size))
391 continue
392 }
393 case adns.TLSAMatchTypeSHA512:
394 if len(r.CertAssoc) != sha512.Size {
395 log.Debug("dane tlsa record with wrong data size for sha2-512", slog.Int("got", len(r.CertAssoc)), slog.Int("expect", sha512.Size))
396 continue
397 }
398 default:
399 continue
400 }
401
402 l[o] = r
403 o++
404 }
405 return l[:o]
406}
407
408// GatherTLSANames returns the allowed names in TLS certificates for verification
409// with PKIX-* or DANE-TA. The first name should be used for SNI.
410//
411// If there was no MX record, the next-hop domain parameters (i.e. the original
412// email destination host, and its CNAME-expanded host, that has MX records) are
413// ignored and only the base domain parameters are taken into account.
414func GatherTLSANames(haveMX, expandedNextHopAuthentic, expandedTLSABaseDomainAuthentic bool, origNextHop, expandedNextHop, origTLSABaseDomain, expandedTLSABaseDomain dns.Domain) []dns.Domain {
415 // Gather the names to check against TLS certificate. ../rfc/7672:1318
416 if !haveMX {
417 // ../rfc/7672:1336
418 if !expandedTLSABaseDomainAuthentic || origTLSABaseDomain == expandedTLSABaseDomain {
419 return []dns.Domain{origTLSABaseDomain}
420 }
421 return []dns.Domain{expandedTLSABaseDomain, origTLSABaseDomain}
422 } else if expandedNextHopAuthentic {
423 // ../rfc/7672:1326
424 var l []dns.Domain
425 if expandedTLSABaseDomainAuthentic {
426 l = []dns.Domain{expandedTLSABaseDomain}
427 }
428 if expandedTLSABaseDomain != origTLSABaseDomain {
429 l = append(l, origTLSABaseDomain)
430 }
431 l = append(l, origNextHop)
432 if origNextHop != expandedNextHop {
433 l = append(l, expandedNextHop)
434 }
435 return l
436 } else {
437 // We don't attempt DANE after insecure MX, but behaviour for it is specified.
438 // ../rfc/7672:1332
439 return []dns.Domain{origNextHop}
440 }
441}
442