1package smtpclient
2
3import (
4 "context"
5 "crypto/sha256"
6 "crypto/sha512"
7 "crypto/x509"
8 "errors"
9 "fmt"
10 "net"
11 "sort"
12 "strings"
13 "time"
14
15 "github.com/mjl-/adns"
16
17 "github.com/mjl-/mox/dns"
18 "github.com/mjl-/mox/mlog"
19)
20
21var (
22 errCNAMELoop = errors.New("cname loop")
23 errCNAMELimit = errors.New("too many cname records")
24 errDNS = errors.New("dns lookup error")
25 errNoMail = errors.New("domain does not accept email as indicated with single dot for mx record")
26)
27
28// GatherDestinations looks up the hosts to deliver email to a domain ("next-hop").
29// If it is an IP address, it is the only destination to try. Otherwise CNAMEs of
30// the domain are followed. Then MX records for the expanded CNAME are looked up.
31// If no MX record is present, the original domain is returned. If an MX record is
32// present but indicates the domain does not accept email, ErrNoMail is returned.
33// If valid MX records were found, the MX target hosts are returned.
34//
35// haveMX indicates if an MX record was found.
36//
37// origNextHopAuthentic indicates if the DNS record for the initial domain name was
38// DNSSEC secure (CNAME, MX).
39//
40// expandedNextHopAuthentic indicates if the DNS records after following CNAMEs were
41// DNSSEC secure.
42//
43// These authentic flags are used by DANE, to determine where to look up TLSA
44// records, and which names to allow in the remote TLS certificate. If MX records
45// were found, both the original and expanded next-hops must be authentic for DANE
46// to apply. For a non-IP with no MX records found, the authentic result can be
47// used to decide which of the names to use as TLSA base domain.
48func GatherDestinations(ctx context.Context, log *mlog.Log, resolver dns.Resolver, origNextHop dns.IPDomain) (haveMX, origNextHopAuthentic, expandedNextHopAuthentic bool, expandedNextHop dns.Domain, hosts []dns.IPDomain, permanent bool, err error) {
49 // ../rfc/5321:3824
50
51 // IP addresses are dialed directly, and don't have TLSA records.
52 if len(origNextHop.IP) > 0 {
53 return false, false, false, expandedNextHop, []dns.IPDomain{origNextHop}, false, nil
54 }
55
56 // We start out assuming the result is authentic. Updated with each lookup.
57 origNextHopAuthentic = true
58 expandedNextHopAuthentic = true
59
60 // We start out delivering to the recipient domain. We follow CNAMEs.
61 rcptDomain := origNextHop.Domain
62 // Domain we are actually delivering to, after following CNAME record(s).
63 expandedNextHop = rcptDomain
64 // Keep track of CNAMEs we have followed, to detect loops.
65 domainsSeen := map[string]bool{}
66 for i := 0; ; i++ {
67 if domainsSeen[expandedNextHop.ASCII] {
68 // todo: only mark as permanent failure if TTLs for all records are beyond latest possibly delivery retry we would do.
69 err := fmt.Errorf("%w: recipient domain %s: already saw %s", errCNAMELoop, rcptDomain, expandedNextHop)
70 return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
71 }
72 domainsSeen[expandedNextHop.ASCII] = true
73
74 // note: The Go resolver returns the requested name if the domain has no CNAME
75 // record but has a host record.
76 if i == 16 {
77 // We have a maximum number of CNAME records we follow. There is no hard limit for
78 // DNS, and you might think folks wouldn't configure CNAME chains at all, but for
79 // (non-mail) domains, CNAME chains of 10 records have been encountered according
80 // to the internet.
81 // todo: only mark as permanent failure if TTLs for all records are beyond latest possibly delivery retry we would do.
82 err := fmt.Errorf("%w: recipient domain %s, last resolved domain %s", errCNAMELimit, rcptDomain, expandedNextHop)
83 return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
84 }
85
86 // Do explicit CNAME lookup. Go's LookupMX also resolves CNAMEs, but we want to
87 // know the final name, and we're interested in learning if the first vs later
88 // results were DNSSEC-(in)secure.
89 // ../rfc/5321:3838 ../rfc/3974:197
90 cctx, ccancel := context.WithTimeout(ctx, 30*time.Second)
91 defer ccancel()
92 cname, cnameResult, err := resolver.LookupCNAME(cctx, expandedNextHop.ASCII+".")
93 ccancel()
94 if i == 0 {
95 origNextHopAuthentic = origNextHopAuthentic && cnameResult.Authentic
96 }
97 expandedNextHopAuthentic = expandedNextHopAuthentic && cnameResult.Authentic
98 if err != nil && !dns.IsNotFound(err) {
99 err = fmt.Errorf("%w: cname lookup for %s: %v", errDNS, expandedNextHop, err)
100 return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
101 }
102 if err == nil && cname != expandedNextHop.ASCII+"." {
103 d, err := dns.ParseDomain(strings.TrimSuffix(cname, "."))
104 if err != nil {
105 // todo: only mark as permanent failure if TTLs for all records are beyond latest possibly delivery retry we would do.
106 err = fmt.Errorf("%w: parsing cname domain %s: %v", errDNS, expandedNextHop, err)
107 return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
108 }
109 expandedNextHop = d
110 // Start again with new domain.
111 continue
112 }
113
114 // Not a CNAME, so lookup MX record.
115 mctx, mcancel := context.WithTimeout(ctx, 30*time.Second)
116 defer mcancel()
117 // Note: LookupMX can return an error and still return records: Invalid records are
118 // filtered out and an error returned. We must process any records that are valid.
119 // Only if all are unusable will we return an error. ../rfc/5321:3851
120 mxl, mxResult, err := resolver.LookupMX(mctx, expandedNextHop.ASCII+".")
121 mcancel()
122 if i == 0 {
123 origNextHopAuthentic = origNextHopAuthentic && mxResult.Authentic
124 }
125 expandedNextHopAuthentic = expandedNextHopAuthentic && mxResult.Authentic
126 if err != nil && len(mxl) == 0 {
127 if !dns.IsNotFound(err) {
128 err = fmt.Errorf("%w: mx lookup for %s: %v", errDNS, expandedNextHop, err)
129 return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
130 }
131
132 // No MX record, attempt delivery directly to host. ../rfc/5321:3842
133 hosts = []dns.IPDomain{{Domain: expandedNextHop}}
134 return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, hosts, false, nil
135 } else if err != nil {
136 log.Infox("mx record has some invalid records, keeping only the valid mx records", err)
137 }
138
139 // ../rfc/7505:122
140 if err == nil && len(mxl) == 1 && mxl[0].Host == "." {
141 // Note: Depending on MX record TTL, this record may be replaced with a more
142 // receptive MX record before our final delivery attempt. But it's clearly the
143 // explicit desire not to be bothered with email delivery attempts, so mark failure
144 // as permanent.
145 return true, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, true, errNoMail
146 }
147
148 // The Go resolver already sorts by preference, randomizing records of same
149 // preference. ../rfc/5321:3885
150 for _, mx := range mxl {
151 // Parsing lax (unless pedantic mode) for MX targets with underscores as seen in the wild.
152 host, err := dns.ParseDomainLax(strings.TrimSuffix(mx.Host, "."))
153 if err != nil {
154 // note: should not happen because Go resolver already filters these out.
155 err = fmt.Errorf("%w: invalid host name in mx record %q: %v", errDNS, mx.Host, err)
156 return true, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, true, err
157 }
158 hosts = append(hosts, dns.IPDomain{Domain: host})
159 }
160 if len(hosts) > 0 {
161 err = nil
162 }
163 return true, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, hosts, false, err
164 }
165}
166
167// GatherIPs looks up the IPs to try for connecting to host, with the IPs ordered
168// to take previous attempts into account. For use with DANE, the CNAME-expanded
169// name is returned, and whether the DNS responses were authentic.
170func GatherIPs(ctx context.Context, log *mlog.Log, resolver dns.Resolver, host dns.IPDomain, dialedIPs map[string][]net.IP) (authentic bool, expandedAuthentic bool, expandedHost dns.Domain, ips []net.IP, dualstack bool, rerr error) {
171 if len(host.IP) > 0 {
172 return false, false, dns.Domain{}, []net.IP{host.IP}, false, nil
173 }
174
175 authentic = true
176 expandedAuthentic = true
177
178 // The Go resolver automatically follows CNAMEs, which is not allowed for host
179 // names in MX records, but seems to be accepted and is documented for DANE SMTP
180 // behaviour. We resolve CNAMEs explicitly, so we can return the final name, which
181 // DANE needs. ../rfc/7671:246
182 // ../rfc/5321:3861 ../rfc/2181:661 ../rfc/7672:1382 ../rfc/7671:1030
183 name := host.Domain.ASCII + "."
184
185 for i := 0; ; i++ {
186 cname, result, err := resolver.LookupCNAME(ctx, name)
187 if i == 0 {
188 authentic = result.Authentic
189 }
190 expandedAuthentic = expandedAuthentic && result.Authentic
191 if dns.IsNotFound(err) {
192 break
193 } else if err != nil {
194 return authentic, expandedAuthentic, dns.Domain{}, nil, dualstack, err
195 } else if strings.TrimSuffix(cname, ".") == strings.TrimSuffix(name, ".") {
196 break
197 }
198 if i > 10 {
199 return authentic, expandedAuthentic, dns.Domain{}, nil, dualstack, fmt.Errorf("mx lookup: %w", errCNAMELimit)
200 }
201 name = strings.TrimSuffix(cname, ".") + "."
202 }
203
204 if name == host.Domain.ASCII+"." {
205 expandedHost = host.Domain
206 } else {
207 var err error
208 expandedHost, err = dns.ParseDomain(strings.TrimSuffix(name, "."))
209 if err != nil {
210 return authentic, expandedAuthentic, dns.Domain{}, nil, dualstack, fmt.Errorf("parsing cname-resolved domain: %w", err)
211 }
212 }
213
214 ipaddrs, result, err := resolver.LookupIPAddr(ctx, name)
215 authentic = authentic && result.Authentic
216 expandedAuthentic = expandedAuthentic && result.Authentic
217 if err != nil || len(ipaddrs) == 0 {
218 return authentic, expandedAuthentic, expandedHost, nil, false, fmt.Errorf("looking up %q: %w", name, err)
219 }
220 var have4, have6 bool
221 for _, ipaddr := range ipaddrs {
222 ips = append(ips, ipaddr.IP)
223 if ipaddr.IP.To4() == nil {
224 have6 = true
225 } else {
226 have4 = true
227 }
228 }
229 dualstack = have4 && have6
230 prevIPs := dialedIPs[host.String()]
231 if len(prevIPs) > 0 {
232 prevIP := prevIPs[len(prevIPs)-1]
233 prevIs4 := prevIP.To4() != nil
234 sameFamily := 0
235 for _, ip := range prevIPs {
236 is4 := ip.To4() != nil
237 if prevIs4 == is4 {
238 sameFamily++
239 }
240 }
241 preferPrev := sameFamily == 1
242 // We use stable sort so any preferred/randomized listing from DNS is kept intact.
243 sort.SliceStable(ips, func(i, j int) bool {
244 aIs4 := ips[i].To4() != nil
245 bIs4 := ips[j].To4() != nil
246 if aIs4 != bIs4 {
247 // Prefer "i" if it is not same address family.
248 return aIs4 != prevIs4
249 }
250 // Prefer "i" if it is the same as last and we should be preferring it.
251 return preferPrev && ips[i].Equal(prevIP)
252 })
253 log.Debug("ordered ips for dialing", mlog.Field("ips", ips))
254 }
255 return
256}
257
258// GatherTLSA looks up TLSA record for either expandedHost or host, and returns
259// records usable for DANE with SMTP, and host names to allow in DANE-TA
260// certificate name verification.
261//
262// If no records are found, this isn't necessarily an error. It can just indicate
263// the domain/host does not opt-in to DANE, and nil records and a nil error are
264// returned.
265//
266// Only usable records are returned. If any record was found, DANE is required and
267// this is indicated with daneRequired. If no usable records remain, the caller
268// must do TLS, but not verify the remote TLS certificate.
269//
270// Returned values are always meaningful, also when an error was returned.
271func GatherTLSA(ctx context.Context, log *mlog.Log, resolver dns.Resolver, host dns.Domain, expandedAuthentic bool, expandedHost dns.Domain) (daneRequired bool, daneRecords []adns.TLSA, tlsaBaseDomain dns.Domain, err error) {
272 // ../rfc/7672:912
273 // This function is only called when the lookup of host was authentic.
274
275 var l []adns.TLSA
276
277 tlsaBaseDomain = host
278 if host == expandedHost || !expandedAuthentic {
279 l, err = lookupTLSACNAME(ctx, log, resolver, 25, "tcp", host)
280 } else if expandedAuthentic {
281 // ../rfc/7672:934
282 tlsaBaseDomain = expandedHost
283 l, err = lookupTLSACNAME(ctx, log, resolver, 25, "tcp", expandedHost)
284 if err == nil && len(l) == 0 {
285 tlsaBaseDomain = host
286 l, err = lookupTLSACNAME(ctx, log, resolver, 25, "tcp", host)
287 }
288 }
289 if len(l) == 0 || err != nil {
290 daneRequired = err != nil
291 log.Debugx("gathering tlsa records failed", err, mlog.Field("danerequired", daneRequired), mlog.Field("basedomain", tlsaBaseDomain))
292 return daneRequired, nil, tlsaBaseDomain, err
293 }
294 daneRequired = len(l) > 0
295 l = filterUsableTLSARecords(log, l)
296 log.Debug("tlsa records exist", mlog.Field("danerequired", daneRequired), mlog.Field("records", l), mlog.Field("basedomain", tlsaBaseDomain))
297 return daneRequired, l, tlsaBaseDomain, err
298}
299
300// lookupTLSACNAME composes a TLSA domain name to lookup, follows CNAMEs and looks
301// up TLSA records. no TLSA records exist, a nil error is returned as it means
302// the host does not opt-in to DANE.
303func lookupTLSACNAME(ctx context.Context, log *mlog.Log, resolver dns.Resolver, port int, protocol string, host dns.Domain) (l []adns.TLSA, rerr error) {
304 name := fmt.Sprintf("_%d._%s.%s", port, protocol, host.ASCII+".")
305 for i := 0; ; i++ {
306 cname, result, err := resolver.LookupCNAME(ctx, name)
307 if dns.IsNotFound(err) {
308 if !result.Authentic {
309 log.Debugx("cname nxdomain result during tlsa lookup not authentic, not doing dane for host", err, mlog.Field("host", host), mlog.Field("name", name))
310 return nil, nil
311 }
312 break
313 } else if err != nil {
314 return nil, fmt.Errorf("looking up cname for tlsa candidate base domain: %w", err)
315 } else if !result.Authentic {
316 log.Debugx("cname result during tlsa lookup not authentic, not doing dane for host", err, mlog.Field("host", host), mlog.Field("name", name))
317 return nil, nil
318 }
319 if i == 10 {
320 return nil, fmt.Errorf("looking up cname for tlsa candidate base domain: %w", errCNAMELimit)
321 }
322 name = strings.TrimSuffix(cname, ".") + "."
323 }
324 var result adns.Result
325 var err error
326 l, result, err = resolver.LookupTLSA(ctx, 0, "", name)
327 if dns.IsNotFound(err) || err == nil && len(l) == 0 {
328 log.Debugx("no tlsa records for host, not doing dane", err, mlog.Field("host", host), mlog.Field("name", name), mlog.Field("authentic", result.Authentic))
329 return nil, nil
330 } else if err != nil {
331 return nil, fmt.Errorf("looking up tlsa records for tlsa candidate base domain: %w", err)
332 } else if !result.Authentic {
333 log.Debugx("tlsa lookup not authentic, not doing dane for host", err, mlog.Field("host", host), mlog.Field("name", name))
334 return nil, nil
335 }
336 return l, nil
337}
338
339func filterUsableTLSARecords(log *mlog.Log, l []adns.TLSA) []adns.TLSA {
340 // Gather "usable" records. ../rfc/7672:708
341 o := 0
342 for _, r := range l {
343 // A record is not usable when we don't recognize parameters. ../rfc/6698:649
344
345 switch r.Usage {
346 case adns.TLSAUsageDANETA, adns.TLSAUsageDANEEE:
347 default:
348 // We can regard PKIX-TA and PKIX-EE as "unusable" with SMTP DANE. ../rfc/7672:1304
349 continue
350 }
351 switch r.Selector {
352 case adns.TLSASelectorCert, adns.TLSASelectorSPKI:
353 default:
354 continue
355 }
356 switch r.MatchType {
357 case adns.TLSAMatchTypeFull:
358 if r.Selector == adns.TLSASelectorCert {
359 if _, err := x509.ParseCertificate(r.CertAssoc); err != nil {
360 log.Debugx("parsing certificate in dane tlsa record, ignoring", err)
361 continue
362 }
363 } else if r.Selector == adns.TLSASelectorSPKI {
364 if _, err := x509.ParsePKIXPublicKey(r.CertAssoc); err != nil {
365 log.Debugx("parsing certificate in dane tlsa record, ignoring", err)
366 continue
367 }
368 }
369 case adns.TLSAMatchTypeSHA256:
370 if len(r.CertAssoc) != sha256.Size {
371 log.Debug("dane tlsa record with wrong data size for sha2-256", mlog.Field("got", len(r.CertAssoc)), mlog.Field("expect", sha256.Size))
372 continue
373 }
374 case adns.TLSAMatchTypeSHA512:
375 if len(r.CertAssoc) != sha512.Size {
376 log.Debug("dane tlsa record with wrong data size for sha2-512", mlog.Field("got", len(r.CertAssoc)), mlog.Field("expect", sha512.Size))
377 continue
378 }
379 default:
380 continue
381 }
382
383 l[o] = r
384 o++
385 }
386 return l[:o]
387}
388
389// GatherTLSANames returns the allowed names in TLS certificates for verification
390// with PKIX-* or DANE-TA. The first name should be used for SNI.
391//
392// If there was no MX record, the next-hop domain parameters (i.e. the original
393// email destination host, and its CNAME-expanded host, that has MX records) are
394// ignored and only the base domain parameters are taken into account.
395func GatherTLSANames(haveMX, expandedNextHopAuthentic, expandedTLSABaseDomainAuthentic bool, origNextHop, expandedNextHop, origTLSABaseDomain, expandedTLSABaseDomain dns.Domain) []dns.Domain {
396 // Gather the names to check against TLS certificate. ../rfc/7672:1318
397 if !haveMX {
398 // ../rfc/7672:1336
399 if !expandedTLSABaseDomainAuthentic || origTLSABaseDomain == expandedTLSABaseDomain {
400 return []dns.Domain{origTLSABaseDomain}
401 }
402 return []dns.Domain{expandedTLSABaseDomain, origTLSABaseDomain}
403 } else if expandedNextHopAuthentic {
404 // ../rfc/7672:1326
405 var l []dns.Domain
406 if expandedTLSABaseDomainAuthentic {
407 l = []dns.Domain{expandedTLSABaseDomain}
408 }
409 if expandedTLSABaseDomain != origTLSABaseDomain {
410 l = append(l, origTLSABaseDomain)
411 }
412 l = append(l, origNextHop)
413 if origNextHop != expandedNextHop {
414 l = append(l, expandedNextHop)
415 }
416 return l
417 } else {
418 // We don't attempt DANE after insecure MX, but behaviour for it is specified.
419 // ../rfc/7672:1332
420 return []dns.Domain{origNextHop}
421 }
422}
423