1// Package mtastsdb stores MTA-STS policies for later use.
2//
3// An MTA-STS policy can specify how long it may be cached. By storing a
4// policy, it does not have to be fetched again during email delivery, which
5// makes it harder for attackers to intervene.
6package mtastsdb
7
8import (
9 "context"
10 "crypto/tls"
11 "errors"
12 "fmt"
13 "log/slog"
14 "os"
15 "path/filepath"
16 "strings"
17 "time"
18
19 "github.com/prometheus/client_golang/prometheus"
20 "github.com/prometheus/client_golang/prometheus/promauto"
21
22 "github.com/mjl-/bstore"
23
24 "github.com/mjl-/mox/dns"
25 "github.com/mjl-/mox/mlog"
26 "github.com/mjl-/mox/mox-"
27 "github.com/mjl-/mox/mtasts"
28 "github.com/mjl-/mox/tlsrpt"
29)
30
31var (
32 metricGet = promauto.NewCounterVec(
33 prometheus.CounterOpts{
34 Name: "mox_mtastsdb_get_total",
35 Help: "Number of Get by result.",
36 },
37 []string{"result"},
38 )
39)
40
41var timeNow = time.Now // Tests override this.
42
43// PolicyRecord is a cached policy or absence of a policy.
44type PolicyRecord struct {
45 Domain string // Domain name, with unicode characters.
46 Inserted time.Time `bstore:"default now"`
47 ValidEnd time.Time
48 LastUpdate time.Time // Policies are refreshed on use and periodically.
49 LastUse time.Time `bstore:"index"`
50 Backoff bool
51 RecordID string // As retrieved from DNS.
52 mtasts.Policy // As retrieved from the well-known HTTPS url.
53
54 // Text that make up the policy, as retrieved. We didn't store this in the past. If
55 // empty, policy can be reconstructed from Policy field. Needed by TLSRPT.
56 PolicyText string
57}
58
59var (
60 // No valid non-expired policy in database.
61 ErrNotFound = errors.New("mtastsdb: policy not found")
62
63 // Indicates an MTA-STS TXT record was fetched recently, but fetching the policy
64 // failed and should not yet be retried.
65 ErrBackoff = errors.New("mtastsdb: policy fetch failed recently")
66)
67
68var DBTypes = []any{PolicyRecord{}} // Types stored in DB.
69var DB *bstore.DB // Exported for backups.
70
71// Init opens the database and starts a goroutine that refreshes policies in
72// the database, and keeps doing so periodically.
73func Init(refresher bool) error {
74 log := mlog.New("mtastsdb", nil)
75
76 p := mox.DataDirPath("mtasts.db")
77 os.MkdirAll(filepath.Dir(p), 0770)
78 opts := bstore.Options{Timeout: 5 * time.Second, Perm: 0660, RegisterLogger: log.Logger}
79 var err error
80 DB, err = bstore.Open(mox.Shutdown, p, &opts, DBTypes...)
81 if err != nil {
82 return err
83 }
84
85 if refresher {
86 // todo: allow us to shut down cleanly?
87 go refresh()
88 }
89
90 return nil
91}
92
93// Close closes the database.
94func Close() error {
95 if err := DB.Close(); err != nil {
96 return fmt.Errorf("close db: %w", err)
97 }
98 DB = nil
99 return nil
100}
101
102// lookup looks up a policy for the domain in the database.
103//
104// Only non-expired records are returned.
105//
106// Returns ErrNotFound if record is not present.
107// Returns ErrBackoff if a recent attempt to fetch a record failed.
108func lookup(ctx context.Context, log mlog.Log, domain dns.Domain) (*PolicyRecord, error) {
109 if domain.IsZero() {
110 return nil, fmt.Errorf("empty domain")
111 }
112 now := timeNow()
113 q := bstore.QueryDB[PolicyRecord](ctx, DB)
114 q.FilterNonzero(PolicyRecord{Domain: domain.Name()})
115 q.FilterGreater("ValidEnd", now)
116 pr, err := q.Get()
117 if err == bstore.ErrAbsent {
118 return nil, ErrNotFound
119 } else if err != nil {
120 return nil, err
121 }
122
123 pr.LastUse = now
124 if err := DB.Update(ctx, &pr); err != nil {
125 log.Errorx("marking cached mta-sts policy as used in database", err)
126 }
127 if pr.Backoff {
128 return nil, ErrBackoff
129 }
130 return &pr, nil
131}
132
133// Upsert adds the policy to the database, overwriting an existing policy for the domain.
134// Policy can be nil, indicating a failure to fetch the policy.
135func Upsert(ctx context.Context, domain dns.Domain, recordID string, policy *mtasts.Policy, policyText string) error {
136 return DB.Write(ctx, func(tx *bstore.Tx) error {
137 pr := PolicyRecord{Domain: domain.Name()}
138 err := tx.Get(&pr)
139 if err != nil && err != bstore.ErrAbsent {
140 return err
141 }
142
143 now := timeNow()
144
145 var p mtasts.Policy
146 if policy != nil {
147 p = *policy
148 } else {
149 // ../rfc/8461:552
150 p.Mode = mtasts.ModeNone
151 p.MaxAgeSeconds = 5 * 60
152 }
153 backoff := policy == nil
154 validEnd := now.Add(time.Duration(p.MaxAgeSeconds) * time.Second)
155
156 if err == bstore.ErrAbsent {
157 pr = PolicyRecord{domain.Name(), now, validEnd, now, now, backoff, recordID, p, policyText}
158 return tx.Insert(&pr)
159 }
160
161 pr.ValidEnd = validEnd
162 pr.LastUpdate = now
163 pr.LastUse = now
164 pr.Backoff = backoff
165 pr.RecordID = recordID
166 pr.Policy = p
167 pr.PolicyText = policyText
168 return tx.Update(&pr)
169 })
170}
171
172// PolicyRecords returns all policies in the database, sorted descending by last
173// use, domain.
174func PolicyRecords(ctx context.Context) ([]PolicyRecord, error) {
175 return bstore.QueryDB[PolicyRecord](ctx, DB).SortDesc("LastUse", "Domain").List()
176}
177
178// Get retrieves an MTA-STS policy for domain and whether it is fresh.
179//
180// If an error is returned, it should be considered a transient error, e.g. a
181// temporary DNS lookup failure.
182//
183// The returned policy can be nil also when there is no error. In this case, the
184// domain does not implement MTA-STS.
185//
186// If a policy is present in the local database, it is refreshed if needed. If no
187// policy is present for the domain, an attempt is made to fetch the policy and
188// store it in the local database.
189//
190// Some errors are logged but not otherwise returned, e.g. if a new policy is
191// supposedly published but could not be retrieved.
192//
193// Get returns an "sts" or "no-policy-found" in reportResult in most cases (when
194// not a local/internal error). It may add an "sts" result without policy contents
195// ("policy-string") in case of errors while fetching the policy.
196func Get(ctx context.Context, elog *slog.Logger, resolver dns.Resolver, domain dns.Domain) (policy *mtasts.Policy, reportResult tlsrpt.Result, fresh bool, err error) {
197 log := mlog.New("mtastsdb", elog)
198 defer func() {
199 result := "ok"
200 if err != nil && errors.Is(err, ErrBackoff) {
201 result = "backoff"
202 } else if err != nil && errors.Is(err, ErrNotFound) {
203 result = "notfound"
204 } else if err != nil {
205 result = "error"
206 }
207 metricGet.WithLabelValues(result).Inc()
208 log.Debugx("mtastsdb get result", err, slog.Any("domain", domain), slog.Bool("fresh", fresh))
209 }()
210
211 cachedPolicy, err := lookup(ctx, log, domain)
212 if err != nil && errors.Is(err, ErrNotFound) {
213 // We don't have a policy for this domain, not even a record that we tried recently
214 // and should backoff. So attempt to fetch policy.
215 nctx, cancel := context.WithTimeout(ctx, time.Minute)
216 defer cancel()
217 record, p, ptext, err := mtasts.Get(nctx, log.Logger, resolver, domain)
218 if err != nil {
219 switch {
220 case errors.Is(err, mtasts.ErrNoRecord) || errors.Is(err, mtasts.ErrMultipleRecords) || errors.Is(err, mtasts.ErrRecordSyntax) || errors.Is(err, mtasts.ErrNoPolicy) || errors.Is(err, mtasts.ErrPolicyFetch) || errors.Is(err, mtasts.ErrPolicySyntax):
221 // Remote is not doing MTA-STS, continue below. ../rfc/8461:333 ../rfc/8461:574
222 log.Debugx("interpreting mtasts error to mean remote is not doing mta-sts", err)
223
224 if errors.Is(err, mtasts.ErrNoRecord) {
225 reportResult = tlsrpt.MakeResult(tlsrpt.NoPolicyFound, domain)
226 } else {
227 fd := policyFetchFailureDetails(err)
228 reportResult = tlsrpt.MakeResult(tlsrpt.STS, domain, fd)
229 }
230
231 default:
232 // Interpret as temporary error, e.g. mtasts.ErrDNS, try again later.
233
234 // Temporary DNS error could be an operational issue on our side, but we can still
235 // report it.
236 // Result: ../rfc/8460:594
237 fd := tlsrpt.Details(tlsrpt.ResultSTSPolicyFetch, mtasts.TLSReportFailureReason(err))
238 reportResult = tlsrpt.MakeResult(tlsrpt.STS, domain, fd)
239
240 return nil, reportResult, false, fmt.Errorf("lookup up mta-sts policy: %w", err)
241 }
242 } else if p.Mode == mtasts.ModeNone {
243 reportResult = tlsrpt.MakeResult(tlsrpt.NoPolicyFound, domain)
244 } else {
245 reportResult = tlsrpt.Result{Policy: tlsrptPolicy(p, ptext, domain)}
246 }
247
248 // Insert policy into database. If we could not fetch the policy itself, we back
249 // off for 5 minutes. ../rfc/8461:555
250 if err == nil || errors.Is(err, mtasts.ErrNoPolicy) || errors.Is(err, mtasts.ErrPolicyFetch) || errors.Is(err, mtasts.ErrPolicySyntax) {
251 var recordID string
252 if record != nil {
253 recordID = record.ID
254 }
255 if err := Upsert(ctx, domain, recordID, p, ptext); err != nil {
256 log.Errorx("inserting policy into cache, continuing", err)
257 }
258 }
259
260 return p, reportResult, true, nil
261 } else if err != nil && errors.Is(err, ErrBackoff) {
262 // ../rfc/8461:552
263 // We recently failed to fetch a policy, act as if MTA-STS is not implemented.
264 // Result: ../rfc/8460:594
265 fd := tlsrpt.Details(tlsrpt.ResultSTSPolicyFetch, "back-off-after-recent-fetch-error")
266 reportResult = tlsrpt.MakeResult(tlsrpt.STS, domain, fd)
267 return nil, reportResult, false, nil
268 } else if err != nil {
269 // We don't add the result to the report, this is an internal error.
270 return nil, reportResult, false, fmt.Errorf("looking up mta-sts policy in cache: %w", err)
271 }
272
273 // Policy was found in database. Check in DNS it is still fresh.
274 policy = &cachedPolicy.Policy
275 nctx, cancel := context.WithTimeout(ctx, 30*time.Second)
276 defer cancel()
277 record, _, err := mtasts.LookupRecord(nctx, log.Logger, resolver, domain)
278 if err != nil {
279 if errors.Is(err, mtasts.ErrNoRecord) {
280 if policy.Mode != mtasts.ModeNone {
281 log.Errorx("no mtasts dns record while checking non-none policy for freshness, either domain owner removed mta-sts without phasing out policy with a none-policy for period of previous max-age, or this could be an attempt to downgrade to connection without mtasts, continuing with previous policy", err)
282 }
283 // else, policy will be removed by periodic refresher in the near future.
284 } else {
285 // Could be a temporary DNS or configuration error.
286 log.Errorx("checking for freshness of cached mta-sts dns txt record for domain, continuing with previously cached policy", err)
287 }
288
289 // Result: ../rfc/8460:594
290 fd := tlsrpt.Details(tlsrpt.ResultSTSPolicyFetch, mtasts.TLSReportFailureReason(err))
291 if policy.Mode != mtasts.ModeNone {
292 fd.FailureReasonCode += "+fallback-to-cached-policy"
293 }
294 reportResult = tlsrpt.Result{
295 Policy: tlsrptPolicy(policy, cachedPolicy.PolicyText, domain),
296 FailureDetails: []tlsrpt.FailureDetails{fd},
297 }
298 return policy, reportResult, false, nil
299 } else if record.ID == cachedPolicy.RecordID && cachedPolicy.PolicyText != "" {
300 // In the past, we didn't store the raw policy lines in cachedPolicy.Lines. We only
301 // stop now if we do have policy lines in the cache.
302 reportResult = tlsrpt.Result{Policy: tlsrptPolicy(policy, cachedPolicy.PolicyText, domain)}
303 return policy, reportResult, true, nil
304 }
305
306 // New policy should be available, or we are fetching the policy again because we
307 // didn't store the raw policy lines in the past.
308 nctx, cancel = context.WithTimeout(ctx, 30*time.Second)
309 defer cancel()
310 p, ptext, err := mtasts.FetchPolicy(nctx, log.Logger, domain)
311 if err != nil {
312 log.Errorx("fetching updated policy for domain, continuing with previously cached policy", err)
313
314 fd := policyFetchFailureDetails(err)
315 fd.FailureReasonCode += "+fallback-to-cached-policy"
316 reportResult = tlsrpt.Result{
317 Policy: tlsrptPolicy(policy, cachedPolicy.PolicyText, domain),
318 FailureDetails: []tlsrpt.FailureDetails{fd},
319 }
320 return policy, reportResult, false, nil
321 }
322 if err := Upsert(ctx, domain, record.ID, p, ptext); err != nil {
323 log.Errorx("inserting refreshed policy into cache, continuing with fresh policy", err)
324 }
325 reportResult = tlsrpt.Result{Policy: tlsrptPolicy(p, ptext, domain)}
326 return p, reportResult, true, nil
327}
328
329func policyFetchFailureDetails(err error) tlsrpt.FailureDetails {
330 var verificationErr *tls.CertificateVerificationError
331 if errors.As(err, &verificationErr) {
332 resultType, reasonCode := tlsrpt.TLSFailureDetails(verificationErr)
333 // Result: ../rfc/8460:601
334 reason := string(resultType)
335 if reasonCode != "" {
336 reason += "+" + reasonCode
337 }
338 return tlsrpt.Details(tlsrpt.ResultSTSWebPKIInvalid, reason)
339 } else if errors.Is(err, mtasts.ErrPolicySyntax) {
340 // Result: ../rfc/8460:598
341 return tlsrpt.Details(tlsrpt.ResultSTSPolicyInvalid, mtasts.TLSReportFailureReason(err))
342 }
343 // Result: ../rfc/8460:594
344 return tlsrpt.Details(tlsrpt.ResultSTSPolicyFetch, mtasts.TLSReportFailureReason(err))
345}
346
347func tlsrptPolicy(p *mtasts.Policy, policyText string, domain dns.Domain) tlsrpt.ResultPolicy {
348 if policyText == "" {
349 // We didn't always store original policy lines. Reconstruct.
350 policyText = p.String()
351 }
352 lines := strings.Split(strings.TrimSuffix(policyText, "\n"), "\n")
353 for i, line := range lines {
354 lines[i] = strings.TrimSuffix(line, "\r")
355 }
356
357 rp := tlsrpt.ResultPolicy{
358 Type: tlsrpt.STS,
359 Domain: domain.ASCII,
360 String: lines,
361 }
362 rp.MXHost = make([]string, len(p.MX))
363 for i, mx := range p.MX {
364 s := mx.Domain.ASCII
365 if mx.Wildcard {
366 s = "*." + s
367 }
368 rp.MXHost[i] = s
369 }
370 return rp
371}
372