1package dmarc
2
3import (
4 "fmt"
5 "net/url"
6 "strconv"
7 "strings"
8)
9
10type parseErr string
11
12func (e parseErr) Error() string {
13 return string(e)
14}
15
16// ParseRecord parses a DMARC TXT record.
17//
18// Fields and values are are case-insensitive in DMARC are returned in lower case
19// for easy comparison.
20//
21// DefaultRecord provides default values for tags not present in s.
22func ParseRecord(s string) (record *Record, isdmarc bool, rerr error) {
23 return parseRecord(s, true)
24}
25
26// ParseRecordNoRequired is like ParseRecord, but don't check for required fields
27// for regular DMARC records. Useful for checking the _report._dmarc record.
28func ParseRecordNoRequired(s string) (record *Record, isdmarc bool, rerr error) {
29 return parseRecord(s, false)
30}
31
32func parseRecord(s string, checkRequired bool) (record *Record, isdmarc bool, rerr error) {
33 defer func() {
34 x := recover()
35 if x == nil {
36 return
37 }
38 if err, ok := x.(parseErr); ok {
39 rerr = err
40 return
41 }
42 panic(x)
43 }()
44
45 r := DefaultRecord
46 p := newParser(s)
47
48 // v= is required and must be first. ../rfc/7489:1099
49 p.xtake("v")
50 p.wsp()
51 p.xtake("=")
52 p.wsp()
53 r.Version = p.xtakecase("DMARC1")
54 p.wsp()
55 p.xtake(";")
56 isdmarc = true
57 seen := map[string]bool{}
58 for {
59 p.wsp()
60 if p.empty() {
61 break
62 }
63 W := p.xword()
64 w := strings.ToLower(W)
65 if seen[w] {
66 // RFC does not say anything about duplicate tags. They can only confuse, so we
67 // don't allow them.
68 p.xerrorf("duplicate tag %q", W)
69 }
70 seen[w] = true
71 p.wsp()
72 p.xtake("=")
73 p.wsp()
74 switch w {
75 default:
76 // ../rfc/7489:924 implies that we should know how to parse unknown tags.
77 // The formal definition at ../rfc/7489:1127 does not allow for unknown tags.
78 // We just parse until the next semicolon or end.
79 for !p.empty() {
80 if p.peek(';') {
81 break
82 }
83 p.xtaken(1)
84 }
85 case "p":
86 if len(seen) != 1 {
87 // ../rfc/7489:1105
88 p.xerrorf("p= (policy) must be first tag")
89 }
90 r.Policy = DMARCPolicy(p.xtakelist("none", "quarantine", "reject"))
91 case "sp":
92 r.SubdomainPolicy = DMARCPolicy(p.xkeyword())
93 // note: we check if the value is valid before returning.
94 case "rua":
95 r.AggregateReportAddresses = append(r.AggregateReportAddresses, p.xuri())
96 p.wsp()
97 for p.take(",") {
98 p.wsp()
99 r.AggregateReportAddresses = append(r.AggregateReportAddresses, p.xuri())
100 p.wsp()
101 }
102 case "ruf":
103 r.FailureReportAddresses = append(r.FailureReportAddresses, p.xuri())
104 p.wsp()
105 for p.take(",") {
106 p.wsp()
107 r.FailureReportAddresses = append(r.FailureReportAddresses, p.xuri())
108 p.wsp()
109 }
110 case "adkim":
111 r.ADKIM = Align(p.xtakelist("r", "s"))
112 case "aspf":
113 r.ASPF = Align(p.xtakelist("r", "s"))
114 case "ri":
115 r.AggregateReportingInterval = p.xnumber()
116 case "fo":
117 r.FailureReportingOptions = []string{p.xtakelist("0", "1", "d", "s")}
118 p.wsp()
119 for p.take(":") {
120 p.wsp()
121 r.FailureReportingOptions = append(r.FailureReportingOptions, p.xtakelist("0", "1", "d", "s"))
122 p.wsp()
123 }
124 case "rf":
125 r.ReportingFormat = []string{p.xkeyword()}
126 p.wsp()
127 for p.take(":") {
128 p.wsp()
129 r.ReportingFormat = append(r.ReportingFormat, p.xkeyword())
130 p.wsp()
131 }
132 case "pct":
133 r.Percentage = p.xnumber()
134 if r.Percentage > 100 {
135 p.xerrorf("bad percentage %d", r.Percentage)
136 }
137 }
138 p.wsp()
139 if !p.take(";") && !p.empty() {
140 p.xerrorf("expected ;")
141 }
142 }
143
144 // ../rfc/7489:1106 says "p" is required, but ../rfc/7489:1407 implies we must be
145 // able to parse a record without a "p" or with invalid "sp" tag.
146 sp := r.SubdomainPolicy
147 if checkRequired && (!seen["p"] || sp != PolicyEmpty && sp != PolicyNone && sp != PolicyQuarantine && sp != PolicyReject) {
148 if len(r.AggregateReportAddresses) > 0 {
149 r.Policy = PolicyNone
150 r.SubdomainPolicy = PolicyEmpty
151 } else {
152 p.xerrorf("invalid (subdomain)policy and no valid aggregate reporting address")
153 }
154 }
155
156 return &r, true, nil
157}
158
159type parser struct {
160 s string
161 lower string
162 o int
163}
164
165// toLower lower cases bytes that are A-Z. strings.ToLower does too much. and
166// would replace invalid bytes with unicode replacement characters, which would
167// break our requirement that offsets into the original and upper case strings
168// point to the same character.
169func toLower(s string) string {
170 r := []byte(s)
171 for i, c := range r {
172 if c >= 'A' && c <= 'Z' {
173 r[i] = c + 0x20
174 }
175 }
176 return string(r)
177}
178
179func newParser(s string) *parser {
180 return &parser{
181 s: s,
182 lower: toLower(s),
183 }
184}
185
186func (p *parser) xerrorf(format string, args ...any) {
187 msg := fmt.Sprintf(format, args...)
188 if p.o < len(p.s) {
189 msg += fmt.Sprintf(" (remain %q)", p.s[p.o:])
190 }
191 panic(parseErr(msg))
192}
193
194func (p *parser) empty() bool {
195 return p.o >= len(p.s)
196}
197
198func (p *parser) peek(b byte) bool {
199 return p.o < len(p.s) && p.s[p.o] == b
200}
201
202// case insensitive prefix
203func (p *parser) prefix(s string) bool {
204 return strings.HasPrefix(p.lower[p.o:], s)
205}
206
207func (p *parser) take(s string) bool {
208 if p.prefix(s) {
209 p.o += len(s)
210 return true
211 }
212 return false
213}
214
215func (p *parser) xtaken(n int) string {
216 r := p.lower[p.o : p.o+n]
217 p.o += n
218 return r
219}
220
221func (p *parser) xtake(s string) string {
222 if !p.prefix(s) {
223 p.xerrorf("expected %q", s)
224 }
225 return p.xtaken(len(s))
226}
227
228func (p *parser) xtakecase(s string) string {
229 if !strings.HasPrefix(p.s[p.o:], s) {
230 p.xerrorf("expected %q", s)
231 }
232 r := p.s[p.o : p.o+len(s)]
233 p.o += len(s)
234 return r
235}
236
237// *WSP
238func (p *parser) wsp() {
239 for !p.empty() && (p.s[p.o] == ' ' || p.s[p.o] == '\t') {
240 p.o++
241 }
242}
243
244// take one of the strings in l.
245func (p *parser) xtakelist(l ...string) string {
246 for _, s := range l {
247 if p.prefix(s) {
248 return p.xtaken(len(s))
249 }
250 }
251 p.xerrorf("expected on one %v", l)
252 panic("not reached")
253}
254
255func (p *parser) xtakefn1case(fn func(byte, int) bool) string {
256 for i, b := range []byte(p.lower[p.o:]) {
257 if !fn(b, i) {
258 if i == 0 {
259 p.xerrorf("expected at least one char")
260 }
261 return p.xtaken(i)
262 }
263 }
264 if p.empty() {
265 p.xerrorf("expected at least 1 char")
266 }
267 r := p.s[p.o:]
268 p.o += len(r)
269 return r
270}
271
272// used for the tag keys.
273func (p *parser) xword() string {
274 return p.xtakefn1case(func(c byte, i int) bool {
275 return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9'
276 })
277}
278
279func (p *parser) xdigits() string {
280 return p.xtakefn1case(func(b byte, i int) bool {
281 return isdigit(b)
282 })
283}
284
285// ../rfc/7489:883
286// Syntax: ../rfc/7489:1132
287func (p *parser) xuri() URI {
288 // Ideally, we would simply parse an URI here. But a URI can contain a semicolon so
289 // could consume the rest of the DMARC record. Instead, we'll assume no one uses
290 // semicolons in URIs in DMARC records and first collect
291 // space/comma/semicolon/end-separated characters, then parse.
292 // ../rfc/3986:684
293 v := p.xtakefn1case(func(b byte, i int) bool {
294 return b != ',' && b != ' ' && b != '\t' && b != ';'
295 })
296 t := strings.SplitN(v, "!", 2)
297 u, err := url.Parse(t[0])
298 if err != nil {
299 p.xerrorf("parsing uri %q: %s", t[0], err)
300 }
301 if u.Scheme == "" {
302 p.xerrorf("missing scheme in uri")
303 }
304 uri := URI{
305 Address: t[0],
306 }
307 if len(t) == 2 {
308 o := t[1]
309 if o != "" {
310 c := o[len(o)-1]
311 switch c {
312 case 'k', 'K', 'm', 'M', 'g', 'G', 't', 'T':
313 uri.Unit = strings.ToLower(o[len(o)-1:])
314 o = o[:len(o)-1]
315 }
316 }
317 uri.MaxSize, err = strconv.ParseUint(o, 10, 64)
318 if err != nil {
319 p.xerrorf("parsing max size for uri: %s", err)
320 }
321 }
322 return uri
323}
324
325func (p *parser) xnumber() int {
326 digits := p.xdigits()
327 v, err := strconv.Atoi(digits)
328 if err != nil {
329 p.xerrorf("parsing %q: %s", digits, err)
330 }
331 return v
332}
333
334func (p *parser) xkeyword() string {
335 // ../rfc/7489:1195, keyword is imported from smtp.
336 // ../rfc/5321:2287
337 n := len(p.s) - p.o
338 return p.xtakefn1case(func(b byte, i int) bool {
339 return isalphadigit(b) || (b == '-' && i < n-1 && isalphadigit(p.s[p.o+i+1]))
340 })
341}
342
343func isdigit(b byte) bool {
344 return b >= '0' && b <= '9'
345}
346
347func isalpha(b byte) bool {
348 return b >= 'a' && b <= 'z' || b >= 'A' && b <= 'Z'
349}
350
351func isalphadigit(b byte) bool {
352 return isdigit(b) || isalpha(b)
353}
354