1// Package tlsrptsend sends TLS reports based on success/failure statistics and
2// details gathering while making SMTP STARTTLS connections for delivery. See RFC
6// tlsrptsend is a separate package instead of being in tlsrptdb because it imports
7// queue and queue imports tlsrptdb to store tls results, so that would cause a
10// Sending TLS reports and DMARC reports is very similar. See ../dmarcdb/eval.go:/similar and ../tlsrptsend/send.go:/similar.
12// todo spec:
../rfc/8460:441 ../rfc/8460:463 may lead reader to believe they can find a DANE or MTA-STS policy at the same place, while in practice you'll get an MTA-STS policy at a recipient domain and a DANE policy at a mail host, and that's where the TLSRPT policy is defined. it would have helped with this implementation if the distinction was mentioned explicitly, also earlier in the document (i realized it late in the implementation process based on the terminology entry for the policy domain). examples with a tlsrpt record at a mail host would have helped too.
13// todo spec:
../rfc/8460:1017 example report message misses the required DKIM signature.
32 "golang.org/x/exp/slices"
34 "github.com/prometheus/client_golang/prometheus"
35 "github.com/prometheus/client_golang/prometheus/promauto"
37 "github.com/mjl-/bstore"
39 "github.com/mjl-/mox/config"
40 "github.com/mjl-/mox/dkim"
41 "github.com/mjl-/mox/dns"
42 "github.com/mjl-/mox/message"
43 "github.com/mjl-/mox/metrics"
44 "github.com/mjl-/mox/mlog"
45 "github.com/mjl-/mox/mox-"
46 "github.com/mjl-/mox/moxio"
47 "github.com/mjl-/mox/moxvar"
48 "github.com/mjl-/mox/queue"
49 "github.com/mjl-/mox/smtp"
50 "github.com/mjl-/mox/store"
51 "github.com/mjl-/mox/tlsrpt"
52 "github.com/mjl-/mox/tlsrptdb"
56 metricReport = promauto.NewCounter(
57 prometheus.CounterOpts{
58 Name: "mox_tlsrptsend_report_queued_total",
59 Help: "Total messages with TLS reports queued.",
62 metricReportError = promauto.NewCounter(
63 prometheus.CounterOpts{
64 Name: "mox_tlsrptsend_report_error_total",
65 Help: "Total errors while composing or queueing TLS reports.",
70var jitterRand = mox.NewPseudoRand()
72// time to sleep until sending reports at midnight t, replaced by tests.
73// Jitter so we don't cause load at exactly midnight, other processes may
74// already be doing that.
75var jitteredTimeUntil = func(t time.Time) time.Duration {
76 return time.Until(t.Add(time.Duration(240+jitterRand.Intn(120)) * time.Second))
79// Start launches a goroutine that wakes up just after 00:00 UTC to send TLSRPT
80// reports. Reports are sent spread out over a 4 hour period.
81func Start(resolver dns.Resolver) {
83 log := mlog.New("tlsrptsend")
86 // In case of panic don't take the whole program down.
89 log.Error("recover from panic", mlog.Field("panic", x))
91 metrics.PanicInc(metrics.Tlsrptdb)
95 timer := time.NewTimer(time.Hour) // Reset below.
100 db := tlsrptdb.ResultDB
102 log.Error("no tlsrpt results database for tls reports, not sending reports")
106 // We start sending for previous day, if there are any reports left.
107 endUTC := midnightUTC(time.Now())
110 dayUTC := endUTC.Add(-12 * time.Hour).Format("20060102")
112 // Remove evaluations older than 48 hours (2 reports with 24 hour interval)
113 // They should have been processed by now. We may have kept them
114 // during temporary errors, but persistent temporary errors shouldn't fill up our
115 // database and we don't want to send old reports either.
116 _, err := bstore.QueryDB[tlsrptdb.TLSResult](ctx, db).FilterLess("DayUTC", endUTC.Add((-48-12)*time.Hour).Format("20060102")).Delete()
117 log.Check(err, "removing stale tls results from database")
119 clog := log.WithCid(mox.Cid())
120 clog.Info("sending tls reports", mlog.Field("day", dayUTC))
121 if err := sendReports(ctx, clog, resolver, db, dayUTC, endUTC); err != nil {
122 clog.Errorx("sending tls reports", err)
123 metricReportError.Inc()
125 clog.Info("finished sending tls reports")
128 endUTC = endUTC.Add(24 * time.Hour)
129 timer.Reset(jitteredTimeUntil(endUTC))
133 log.Info("tls report sender shutting down")
141func midnightUTC(now time.Time) time.Time {
143 return time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, t.Location())
146// Sleep in between sending two reports.
148var sleepBetween = func(ctx context.Context, between time.Duration) (ok bool) {
149 t := time.NewTimer(between)
159// sendReports gathers all policy domains that have results that should receive a
160// TLS report and sends a report to each if their TLSRPT DNS record has reporting
162func sendReports(ctx context.Context, log *mlog.Log, resolver dns.Resolver, db *bstore.DB, dayUTC string, endTimeUTC time.Time) error {
168 // Gather all policy domains we plan to send to.
169 rcptDoms := map[key]bool{} // Results where recipient domain is equal to policy domain, regardless of IsHost.
170 nonRcptDoms := map[key]bool{} // MX domains (without those that are also recipient domains).
172 q := bstore.QueryDB[tlsrptdb.TLSResult](ctx, db)
173 q.FilterLessEqual("DayUTC", dayUTC)
174 err := q.ForEach(func(e tlsrptdb.TLSResult) error {
176 if e.PolicyDomain != e.RecipientDomain {
179 k := key{e.PolicyDomain, e.DayUTC}
180 if e.SendReport && !doms[k] {
183 doms[k] = doms[k] || e.SendReport
187 return fmt.Errorf("looking for domains to send tls reports to: %v", err)
190 // Stretch sending reports over max 4 hours, but only if there are quite a few
192 between := 4 * time.Hour
194 between = between / time.Duration(nsend)
196 if between > 5*time.Minute {
197 between = 5 * time.Minute
200 var wg sync.WaitGroup
204 remove := map[key]struct{}{}
205 var removeMutex sync.Mutex
207 sendDomains := func(isRcptDom bool, doms map[key]bool) {
208 for k, send := range doms {
211 remove[k] = struct{}{}
217 ok := sleepBetween(ctx, between)
224 // In goroutine, so our timing stays independent of how fast we process.
228 // In case of panic don't take the whole program down.
231 log.Error("unhandled panic in tlsrptsend sendReports", mlog.Field("panic", x))
233 metrics.PanicInc(metrics.Tlsrptdb)
238 rlog := log.WithCid(mox.Cid()).Fields(mlog.Field("policydomain", k.policyDomain), mlog.Field("daytutc", k.dayUTC), mlog.Field("isrcptdom", isRcptDom))
239 rlog.Info("looking to send tls report for domain")
240 cleanup, err := sendReportDomain(ctx, rlog, resolver, db, endTimeUTC, isRcptDom, k.policyDomain, k.dayUTC)
242 rlog.Errorx("sending tls report to domain", err)
243 metricReportError.Inc()
247 defer removeMutex.Unlock()
248 remove[k] = struct{}{}
254 // We send to recipient domains first. That will store the reporting addresses for
255 // the recipient domains, which are used when sending to nonRcptDoms to potentially
256 // skip sending a duplicate report.
257 sendDomains(true, rcptDoms)
259 sendDomains(false, nonRcptDoms)
262 // Remove all records that have been processed.
263 err = db.Write(ctx, func(tx *bstore.Tx) error {
264 for k := range remove {
265 q := bstore.QueryTx[tlsrptdb.TLSResult](tx)
266 q.FilterNonzero(tlsrptdb.TLSResult{PolicyDomain: k.policyDomain, DayUTC: k.dayUTC})
274 log.Check(err, "cleaning up tls results in database")
279// replaceable for testing.
280var queueAdd = queue.Add
282func sendReportDomain(ctx context.Context, log *mlog.Log, resolver dns.Resolver, db *bstore.DB, endUTC time.Time, isRcptDom bool, policyDomain, dayUTC string) (cleanup bool, rerr error) {
283 polDom, err := dns.ParseDomain(policyDomain)
285 return false, fmt.Errorf("parsing policy domain for sending tls reports: %v", err)
288 // Reports need to be DKIM-signed by the submitter domain. Lookup the DKIM
289 // configuration now. If we don't have any, there is no point sending reports.
290 // todo spec:
../rfc/8460:322 "reporting domain" is a bit ambiguous. submitter domain is used in other places. it may be helpful in practice to allow dmarc-relaxed-like matching of the signing domain, so an address postmaster at mail host can send the reports using dkim keys at a higher-up domain (e.g. the publicsuffix domain).
291 fromDom := mox.Conf.Static.HostnameDomain
292 var confDKIM config.DKIM
294 confDom, ok := mox.Conf.Domain(fromDom)
295 if len(confDom.DKIM.Sign) > 0 {
296 confDKIM = confDom.DKIM
299 return true, fmt.Errorf("domain for mail host does not have dkim signing configured, report message cannot be dkim-signed")
302 // Remove least significant label.
304 _, nfd.ASCII, _ = strings.Cut(fromDom.ASCII, ".")
305 _, nfd.Unicode, _ = strings.Cut(fromDom.Unicode, ".")
308 var zerodom dns.Domain
309 if fromDom == zerodom {
310 return true, fmt.Errorf("no configured domain for mail host found, report message cannot be dkim-signed")
314 // We'll cleanup records by default.
316 // But if we encounter a temporary error we cancel cleanup of evaluations on error.
320 if !cleanup || tempError {
322 log.Debug("not cleaning up results after attempting to send tls report")
326 // Get TLSRPT record. If there are no reporting addresses, we're not going to send at all.
327 record, _, err := tlsrpt.Lookup(ctx, resolver, polDom)
329 // If there is no TLSRPT record, that's fine, we'll remove what we tracked.
330 if errors.Is(err, tlsrpt.ErrNoRecord) {
333 cleanup = errors.Is(err, tlsrpt.ErrDNS)
334 return cleanup, fmt.Errorf("looking up current tlsrpt record for reporting addresses: %v", err)
337 var recipients []message.NameAddress
338 var recipientStrs []string
340 for _, l := range record.RUAs {
341 for _, s := range l {
342 u, err := url.Parse(string(s))
344 log.Debugx("parsing rua uri in tlsrpt dns record, ignoring", err, mlog.Field("rua", s))
348 if u.Scheme == "mailto" {
349 addr, err := smtp.ParseAddress(u.Opaque)
351 log.Debugx("parsing mailto uri in tlsrpt record rua value, ignoring", err, mlog.Field("rua", s))
354 recipients = append(recipients, message.NameAddress{Address: addr})
355 recipientStrs = append(recipientStrs, string(s))
356 } else if u.Scheme == "https" {
357 // Although "report" is ambiguous and could mean both only the JSON data or an
358 // entire message (including DKIM-Signature) with the JSON data, it appears the
359 // intention of the RFC is that the HTTPS transport sends only the JSON data, given
360 // mention of the media type to use (for the HTTP POST). It is the type of the
361 // report, not of a message. TLS reports sent over email must have a DKIM
362 // signature, i.e. must be authenticated, for understandable reasons. No such
363 // requirement is specified for HTTPS, but no one is going to accept
364 // unauthenticated TLS reports over HTTPS. So there seems little point in sending
367 // todo spec: would be good to have clearer distinction between "report" (JSON) and "report message" (message with report attachment, that can be DKIM signed). propose sending report message over https that includes DKIM signature so authenticity can be verified and the report used.
../rfc/8460:310
368 log.Debug("https scheme in rua uri in tlsrpt record, ignoring since they will likey not be used to due lack of authentication", mlog.Field("rua", s))
370 log.Debug("unknown scheme in rua uri in tlsrpt record, ignoring", mlog.Field("rua", s))
375 if len(recipients) == 0 {
376 // No reports requested, perfectly fine, no work to do for us.
377 log.Debug("no tlsrpt reporting addresses configured")
381 q := bstore.QueryDB[tlsrptdb.TLSResult](ctx, db)
383 q.FilterNonzero(tlsrptdb.TLSResult{RecipientDomain: policyDomain, DayUTC: dayUTC})
385 q.FilterNonzero(tlsrptdb.TLSResult{PolicyDomain: policyDomain, DayUTC: dayUTC})
387 tlsResults, err := q.List()
389 return true, fmt.Errorf("get tls results from database: %v", err)
392 if len(tlsResults) == 0 {
393 // Should not happen. But no point in sending messages with empty reports.
394 return true, fmt.Errorf("no tls results found")
397 // Stop if we already sent a report for this destination.
398 for _, r := range tlsResults {
399 if r.PolicyDomain == r.RecipientDomain && (isRcptDom && r.SentToRecipientDomain || !isRcptDom && r.SentToPolicyDomain) {
404 beginUTC := endUTC.Add(-24 * time.Hour)
406 report := tlsrpt.Report{
407 OrganizationName: fromDom.ASCII,
408 DateRange: tlsrpt.TLSRPTDateRange{
412 ContactInfo: "postmaster@" + fromDom.ASCII,
413 // todo spec:
../rfc/8460:968 ../rfc/8460:1772 ../rfc/8460:691 subject header assumes a report-id in the form of a msg-id, but example and report-id json field explanation allows free-form report-id's (assuming we're talking about the same report-id here).
414 ReportID: endUTC.Add(-12*time.Hour).Format("20060102") + "." + polDom.ASCII + "@" + fromDom.ASCII,
417 rcptDomAddresses := map[string][]string{}
418 for _, tlsResult := range tlsResults {
419 rcptDomAddresses[tlsResult.RecipientDomain] = tlsResult.RecipientDomainReportingAddresses
422 // Merge all results into this report.
423 // If we are sending to a recipient domain, we include all relevant policy domains,
424 // so possibly multiple MX hosts (with DANE policies). That means we may be sending
425 // multiple "no-policy-found" results (1 for sts and 0 or more for mx hosts). An
426 // explicit no-sts or no-tlsa would make these less ambiguous, but the
427 // policy-domain's will make clear which is the MX and which is the recipient
428 // domain. Only for recipient domains with an MX target equal to the recipient host
429 // could it be confusing.
430 // If we are sending to MX targets (that aren't recipient domains), we mention the
431 // affected recipient domains as policy-domain while keeping the original policy
432 // domain (MX target) in the "mx-host" field. This behaviour isn't in the RFC, but
433 // seems useful to give MX operators insight into the recipient domains affected.
434 // We also won't include results for a recipient domain if its TLSRPT policy has
435 // the same reporting addresses as the MX target TLSRPT policy.
436 for i, tlsResult := range tlsResults {
438 if slices.Equal(rcptDomAddresses[tlsResult.RecipientDomain], recipientStrs) {
441 for j, r := range tlsResult.Results {
442 if tlsResult.IsHost {
443 tlsResults[i].Results[j].Policy.MXHost = []string{r.Policy.Domain}
445 tlsResults[i].Results[j].Policy.Domain = tlsResult.RecipientDomain
449 report.Merge(tlsResult.Results...)
452 // We may not have any results left, i.e. when this is an MX target and we already
453 // sent all results in the report to the recipient domain with identical reporting
455 if len(report.Policies) == 0 {
459 if !mox.Conf.Static.OutgoingTLSReportsForAllSuccess {
461 // Check there is at least one failure. If not, we don't send a report.
462 for _, r := range report.Policies {
463 if r.Summary.TotalFailureSessionCount > 0 || len(r.FailureDetails) > 0 {
473 log.Info("sending tls report")
475 reportFile, err := store.CreateMessageTemp("tlsreportout")
477 return false, fmt.Errorf("creating temporary file for outgoing tls report: %v", err)
479 defer store.CloseRemoveTempFile(log, reportFile, "generated tls report")
482 gzw := gzip.NewWriter(reportFile)
483 enc := json.NewEncoder(gzw)
484 enc.SetIndent("", "\t")
486 err = enc.Encode(report)
492 return false, fmt.Errorf("writing tls report as json with gzip: %v", err)
495 msgf, err := store.CreateMessageTemp("tlsreportmsgout")
497 return false, fmt.Errorf("creating temporary message file with outgoing tls report: %v", err)
499 defer store.CloseRemoveTempFile(log, msgf, "message with generated tls report")
501 // We are sending reports from our host's postmaster address. In a
502 // typical setup the host is a subdomain of a configured domain with
503 // DKIM keys, so we can DKIM-sign our reports. SPF should pass anyway.
504 // todo future: when sending, use an SMTP MAIL FROM that we can relate back to recipient reporting address so we can stop trying to send reports in case of repeated delivery failure DSNs.
505 from := smtp.Address{Localpart: "postmaster", Domain: fromDom}
508 subject := fmt.Sprintf("Report Domain: %s Submitter: %s Report-ID: <%s>", polDom.ASCII, fromDom, report.ReportID)
511 text := fmt.Sprintf(`Attached is a TLS report with a summary of connection successes and failures
512during attempts to securely deliver messages to your mail server, including
513details about errors encountered. You are receiving this message because your
514address is specified in the "rua" field of the TLSRPT record for your
521`, polDom, fromDom, report.ReportID, beginUTC.Format(time.DateTime), endUTC.Format(time.DateTime))
524 reportFilename := fmt.Sprintf("%s!%s!%d!%d.json.gz", fromDom.ASCII, polDom.ASCII, beginUTC.Unix(), endUTC.Add(-time.Second).Unix())
526 // Compose the message.
527 msgPrefix, has8bit, smtputf8, messageID, err := composeMessage(ctx, log, msgf, polDom, confDKIM, from, recipients, subject, text, reportFilename, reportFile)
529 return false, fmt.Errorf("composing message with outgoing tls report: %v", err)
531 msgInfo, err := msgf.Stat()
533 return false, fmt.Errorf("stat message with outgoing tls report: %v", err)
535 msgSize := int64(len(msgPrefix)) + msgInfo.Size()
537 // Already mark the report as sent. If it won't succeed below, it probably won't
538 // succeed on a later retry either. And if we would fail to mark a report as sent
539 // after sending it, we may sent duplicates or even get in some kind of sending
541 err = db.Write(ctx, func(tx *bstore.Tx) error {
543 q := bstore.QueryTx[tlsrptdb.TLSResult](tx)
544 q.FilterNonzero(tlsrptdb.TLSResult{DayUTC: dayUTC, RecipientDomain: policyDomain})
545 _, err := q.UpdateNonzero(tlsrptdb.TLSResult{SentToRecipientDomain: true})
547 return fmt.Errorf("already marking tls results as sent for recipient domain: %v", err)
550 // Also set reporting addresses for the recipient domain results.
551 q = bstore.QueryTx[tlsrptdb.TLSResult](tx)
552 q.FilterNonzero(tlsrptdb.TLSResult{DayUTC: dayUTC, RecipientDomain: policyDomain})
553 _, err = q.UpdateNonzero(tlsrptdb.TLSResult{RecipientDomainReportingAddresses: recipientStrs})
555 return fmt.Errorf("storing recipient domain reporting addresses: %v", err)
558 q := bstore.QueryTx[tlsrptdb.TLSResult](tx)
559 q.FilterNonzero(tlsrptdb.TLSResult{DayUTC: dayUTC, PolicyDomain: policyDomain})
560 _, err := q.UpdateNonzero(tlsrptdb.TLSResult{SentToPolicyDomain: true})
562 return fmt.Errorf("already marking tls results as sent for policy domain: %v", err)
568 return false, fmt.Errorf("marking tls results as sent: %v", err)
572 for _, rcpt := range recipients {
573 // If recipient is on suppression list, we won't queue the reporting message.
574 q := bstore.QueryDB[tlsrptdb.TLSRPTSuppressAddress](ctx, db)
575 q.FilterNonzero(tlsrptdb.TLSRPTSuppressAddress{ReportingAddress: rcpt.Address.Path().String()})
576 q.FilterGreater("Until", time.Now())
577 exists, err := q.Exists()
579 return false, fmt.Errorf("querying suppress list: %v", err)
582 log.Info("suppressing outgoing tls report", mlog.Field("reportingaddress", rcpt.Address))
586 qm := queue.MakeMsg(mox.Conf.Static.Postmaster.Account, from.Path(), rcpt.Address.Path(), has8bit, smtputf8, msgSize, messageID, []byte(msgPrefix), nil)
587 // Don't try as long as regular deliveries, and stop before we would send the
588 // delayed DSN. Though we also won't send that due to IsTLSReport.
591 qm.IsTLSReport = true
596 err = queueAdd(ctx, log, &qm, msgf)
599 log.Errorx("queueing message with tls report", err)
600 metricReportError.Inc()
604 log.Debug("tls report queued", mlog.Field("recipient", rcpt))
609 // Regardless of whether we queued a report, we are not going to keep the
610 // evaluations around. Though this can be overridden if tempError is set.
616func composeMessage(ctx context.Context, log *mlog.Log, mf *os.File, policyDomain dns.Domain, confDKIM config.DKIM, fromAddr smtp.Address, recipients []message.NameAddress, subject, text, filename string, reportFile *os.File) (msgPrefix string, has8bit, smtputf8 bool, messageID string, rerr error) {
617 xc := message.NewComposer(mf, 100*1024*1024)
623 if err, ok := x.(error); ok && errors.Is(err, message.ErrCompose) {
630 // We only use smtputf8 if we have to, with a utf-8 localpart. For IDNA, we use ASCII domains.
631 for _, a := range recipients {
632 if a.Address.Localpart.IsInternational() {
638 xc.HeaderAddrs("From", []message.NameAddress{{Address: fromAddr}})
639 xc.HeaderAddrs("To", recipients)
642 xc.Header("TLS-Report-Domain", policyDomain.ASCII)
643 xc.Header("TLS-Report-Submitter", fromAddr.Domain.ASCII)
645 xc.Header("TLS-Required", "No")
646 messageID = fmt.Sprintf("<%s>", mox.MessageIDGen(xc.SMTPUTF8))
647 xc.Header("Message-Id", messageID)
648 xc.Header("Date", time.Now().Format(message.RFC5322Z))
649 xc.Header("User-Agent", "mox/"+moxvar.Version)
650 xc.Header("MIME-Version", "1.0")
652 // Multipart message, with a text/plain and the report attached.
653 mp := multipart.NewWriter(xc)
655 xc.Header("Content-Type", fmt.Sprintf(`multipart/report; report-type="tlsrpt"; boundary="%s"`, mp.Boundary()))
658 // Textual part, just mentioning this is a TLS report.
659 textBody, ct, cte := xc.TextPart(text)
660 textHdr := textproto.MIMEHeader{}
661 textHdr.Set("Content-Type", ct)
662 textHdr.Set("Content-Transfer-Encoding", cte)
663 textp, err := mp.CreatePart(textHdr)
664 xc.Checkf(err, "adding text part to message")
665 _, err = textp.Write(textBody)
666 xc.Checkf(err, "writing text part")
668 // TLS report as attachment.
669 ahdr := textproto.MIMEHeader{}
670 ct = mime.FormatMediaType("application/tlsrpt+gzip", map[string]string{"name": filename})
671 ahdr.Set("Content-Type", ct)
672 cd := mime.FormatMediaType("attachment", map[string]string{"filename": filename})
673 ahdr.Set("Content-Disposition", cd)
674 ahdr.Set("Content-Transfer-Encoding", "base64")
675 ap, err := mp.CreatePart(ahdr)
676 xc.Checkf(err, "adding tls report to message")
677 wc := moxio.Base64Writer(ap)
678 _, err = io.Copy(wc, &moxio.AtReader{R: reportFile})
679 xc.Checkf(err, "adding attachment")
681 xc.Checkf(err, "flushing attachment")
684 xc.Checkf(err, "closing multipart")
688 selectors := map[string]config.Selector{}
689 for name, sel := range confDKIM.Selectors {
691 sel.HeadersEffective = append(append([]string{}, sel.HeadersEffective...), "TLS-Report-Domain", "TLS-Report-Submitter")
692 selectors[name] = sel
694 confDKIM.Selectors = selectors
696 dkimHeader, err := dkim.Sign(ctx, fromAddr.Localpart, fromAddr.Domain, confDKIM, smtputf8, mf)
697 xc.Checkf(err, "dkim-signing report message")
699 return dkimHeader, xc.Has8bit, xc.SMTPUTF8, messageID, nil