1package dkim
2
3import (
4 "encoding/base64"
5 "fmt"
6 "strconv"
7 "strings"
8
9 "github.com/mjl-/mox/dns"
10 "github.com/mjl-/mox/moxvar"
11 "github.com/mjl-/mox/smtp"
12)
13
14type parseErr string
15
16func (e parseErr) Error() string {
17 return string(e)
18}
19
20var _ error = parseErr("")
21
22type parser struct {
23 s string
24 o int // Offset into s.
25 tracked string // All data consumed, except when "drop" is true. To be set by caller when parsing the value for "b=".
26 drop bool
27 smtputf8 bool // If set, allow characters > 0x7f.
28}
29
30func (p *parser) xerrorf(format string, args ...any) {
31 msg := fmt.Sprintf(format, args...)
32 if p.o < len(p.s) {
33 msg = fmt.Sprintf("%s (leftover %q)", msg, p.s[p.o:])
34 }
35 panic(parseErr(msg))
36}
37
38func (p *parser) track(s string) {
39 if !p.drop {
40 p.tracked += s
41 }
42}
43
44func (p *parser) hasPrefix(s string) bool {
45 return strings.HasPrefix(p.s[p.o:], s)
46}
47
48func (p *parser) xtaken(n int) string {
49 r := p.s[p.o : p.o+n]
50 p.o += n
51 p.track(r)
52 return r
53}
54
55func (p *parser) xtakefn(ignoreFWS bool, fn func(c rune, i int) bool) string {
56 var r string
57 for i, c := range p.s[p.o:] {
58 if !fn(c, i) {
59 switch c {
60 case ' ', '\t', '\r', '\n':
61 continue
62 }
63 p.xtaken(i)
64 return r
65 }
66 r += string(c)
67 }
68 p.xtaken(len(p.s) - p.o)
69 return r
70}
71
72func (p *parser) empty() bool {
73 return p.o >= len(p.s)
74}
75
76func (p *parser) xnonempty() {
77 if p.o >= len(p.s) {
78 p.xerrorf("expected at least 1 more char")
79 }
80}
81
82func (p *parser) xtakefn1(ignoreFWS bool, fn func(c rune, i int) bool) string {
83 var r string
84 p.xnonempty()
85 for i, c := range p.s[p.o:] {
86 if !fn(c, i) {
87 switch c {
88 case ' ', '\t', '\r', '\n':
89 continue
90 }
91 if i == 0 {
92 p.xerrorf("expected at least 1 char")
93 }
94 p.xtaken(i)
95 return r
96 }
97 r += string(c)
98 }
99 return p.xtaken(len(p.s) - p.o)
100}
101
102func (p *parser) wsp() {
103 p.xtakefn(false, func(c rune, i int) bool {
104 return c == ' ' || c == '\t'
105 })
106}
107
108func (p *parser) fws() {
109 p.wsp()
110 if p.hasPrefix("\r\n ") || p.hasPrefix("\r\n\t") {
111 p.xtaken(3)
112 p.wsp()
113 }
114}
115
116// peekfws returns whether remaining text starts with s, optionally prefix with fws.
117func (p *parser) peekfws(s string) bool {
118 o := p.o
119 p.fws()
120 r := p.hasPrefix(s)
121 p.o = o
122 return r
123}
124
125func (p *parser) xtake(s string) string {
126 if !strings.HasPrefix(p.s[p.o:], s) {
127 p.xerrorf("expected %q", s)
128 }
129 return p.xtaken(len(s))
130}
131
132func (p *parser) take(s string) bool {
133 if strings.HasPrefix(p.s[p.o:], s) {
134 p.o += len(s)
135 p.track(s)
136 return true
137 }
138 return false
139}
140
141// ../rfc/6376:657
142func (p *parser) xtagName() string {
143 return p.xtakefn1(false, func(c rune, i int) bool {
144 return isalpha(c) || i > 0 && (isdigit(c) || c == '_')
145 })
146}
147
148func (p *parser) xalgorithm() (string, string) {
149 // ../rfc/6376:1046
150 xtagx := func(c rune, i int) bool {
151 return isalpha(c) || i > 0 && isdigit(c)
152 }
153 algk := p.xtakefn1(false, xtagx)
154 p.xtake("-")
155 algv := p.xtakefn1(false, xtagx)
156 return algk, algv
157}
158
159// fws in value is ignored. empty/no base64 characters is valid.
160// ../rfc/6376:1021
161// ../rfc/6376:1076
162func (p *parser) xbase64() []byte {
163 s := ""
164 p.xtakefn(false, func(c rune, i int) bool {
165 if isalphadigit(c) || c == '+' || c == '/' || c == '=' {
166 s += string(c)
167 return true
168 }
169 if c == ' ' || c == '\t' {
170 return true
171 }
172 rem := p.s[p.o+i:]
173 if strings.HasPrefix(rem, "\r\n ") || strings.HasPrefix(rem, "\r\n\t") {
174 return true
175 }
176 if (strings.HasPrefix(rem, "\n ") || strings.HasPrefix(rem, "\n\t")) && p.o+i-1 > 0 && p.s[p.o+i-1] == '\r' {
177 return true
178 }
179 return false
180 })
181 buf, err := base64.StdEncoding.DecodeString(s)
182 if err != nil {
183 p.xerrorf("decoding base64: %v", err)
184 }
185 return buf
186}
187
188// parses canonicalization in original case.
189func (p *parser) xcanonical() string {
190 // ../rfc/6376:1100
191 s := p.xhyphenatedWord()
192 if p.take("/") {
193 return s + "/" + p.xhyphenatedWord()
194 }
195 return s
196}
197
198func (p *parser) xdomainselector(isselector bool) dns.Domain {
199 subdomain := func(c rune, i int) bool {
200 // domain names must always be a-labels, ../rfc/6376:1115 ../rfc/6376:1187 ../rfc/6376:1303
201 // dkim selectors with underscores happen in the wild, accept them when not in
202 // pedantic mode. ../rfc/6376:581 ../rfc/5321:2303
203 return isalphadigit(c) || (i > 0 && (c == '-' || isselector && !moxvar.Pedantic && c == '_') && p.o+1 < len(p.s))
204 }
205 s := p.xtakefn1(false, subdomain)
206 for p.hasPrefix(".") {
207 s += p.xtake(".") + p.xtakefn1(false, subdomain)
208 }
209 if isselector {
210 // Not to be interpreted as IDNA.
211 return dns.Domain{ASCII: strings.ToLower(s)}
212 }
213 d, err := dns.ParseDomain(s)
214 if err != nil {
215 p.xerrorf("parsing domain %q: %s", s, err)
216 }
217 return d
218}
219
220func (p *parser) xdomain() dns.Domain {
221 return p.xdomainselector(false)
222}
223
224func (p *parser) xselector() dns.Domain {
225 return p.xdomainselector(true)
226}
227
228func (p *parser) xhdrName(ignoreFWS bool) string {
229 // ../rfc/6376:473
230 // ../rfc/5322:1689
231 // BNF for hdr-name (field-name) allows ";", but DKIM disallows unencoded semicolons. ../rfc/6376:643
232 // ignoreFWS is needed for "z=", which can have FWS anywhere. ../rfc/6376:1372
233 return p.xtakefn1(ignoreFWS, func(c rune, i int) bool {
234 return c > ' ' && c < 0x7f && c != ':' && c != ';'
235 })
236}
237
238func (p *parser) xsignedHeaderFields() []string {
239 // ../rfc/6376:1157
240 l := []string{p.xhdrName(false)}
241 for p.peekfws(":") {
242 p.fws()
243 p.xtake(":")
244 p.fws()
245 l = append(l, p.xhdrName(false))
246 }
247 return l
248}
249
250func (p *parser) xauid() Identity {
251 // ../rfc/6376:1192
252 // Localpart is optional.
253 if p.take("@") {
254 return Identity{Domain: p.xdomain()}
255 }
256 lp := p.xlocalpart()
257 p.xtake("@")
258 dom := p.xdomain()
259 return Identity{&lp, dom}
260}
261
262// todo: reduce duplication between implementations: ../smtp/address.go:/xlocalpart ../dkim/parser.go:/xlocalpart ../smtpserver/parse.go:/xlocalpart
263func (p *parser) xlocalpart() smtp.Localpart {
264 // ../rfc/6376:434
265 // ../rfc/5321:2316
266 var s string
267 if p.hasPrefix(`"`) {
268 s = p.xquotedString()
269 } else {
270 s = p.xatom()
271 for p.take(".") {
272 s += "." + p.xatom()
273 }
274 }
275 // In the wild, some services use large localparts for generated (bounce) addresses.
276 if moxvar.Pedantic && len(s) > 64 || len(s) > 128 {
277 // ../rfc/5321:3486
278 p.xerrorf("localpart longer than 64 octets")
279 }
280 return smtp.Localpart(s)
281}
282
283func (p *parser) xquotedString() string {
284 p.xtake(`"`)
285 var s string
286 var esc bool
287 for {
288 c := p.xchar()
289 if esc {
290 if c >= ' ' && c < 0x7f {
291 s += string(c)
292 esc = false
293 continue
294 }
295 p.xerrorf("invalid localpart, bad escaped char %c", c)
296 }
297 if c == '\\' {
298 esc = true
299 continue
300 }
301 if c == '"' {
302 return s
303 }
304 if c >= ' ' && c < 0x7f && c != '\\' && c != '"' || (c > 0x7f && p.smtputf8) {
305 s += string(c)
306 continue
307 }
308 p.xerrorf("invalid localpart, invalid character %c", c)
309 }
310}
311
312func (p *parser) xchar() rune {
313 // We are careful to track invalid utf-8 properly.
314 if p.empty() {
315 p.xerrorf("need another character")
316 }
317 var r rune
318 var o int
319 for i, c := range p.s[p.o:] {
320 if i > 0 {
321 o = i
322 break
323 }
324 r = c
325 }
326 if o == 0 {
327 p.track(p.s[p.o:])
328 p.o = len(p.s)
329 } else {
330 p.track(p.s[p.o : p.o+o])
331 p.o += o
332 }
333 return r
334}
335
336func (p *parser) xatom() string {
337 return p.xtakefn1(false, func(c rune, i int) bool {
338 switch c {
339 case '!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~':
340 return true
341 }
342 return isalphadigit(c) || (c > 0x7f && p.smtputf8)
343 })
344}
345
346func (p *parser) xbodyLength() int64 {
347 // ../rfc/6376:1265
348 return p.xnumber(76)
349}
350
351func (p *parser) xnumber(maxdigits int) int64 {
352 o := -1
353 for i, c := range p.s[p.o:] {
354 if c >= '0' && c <= '9' {
355 o = i
356 } else {
357 break
358 }
359 }
360 if o == -1 {
361 p.xerrorf("expected digits")
362 }
363 if o+1 > maxdigits {
364 p.xerrorf("too many digits")
365 }
366 v, err := strconv.ParseInt(p.xtaken(o+1), 10, 64)
367 if err != nil {
368 p.xerrorf("parsing digits: %s", err)
369 }
370 return v
371}
372
373func (p *parser) xqueryMethods() []string {
374 // ../rfc/6376:1285
375 l := []string{p.xqtagmethod()}
376 for p.peekfws(":") {
377 p.fws()
378 p.xtake(":")
379 l = append(l, p.xqtagmethod())
380 }
381 return l
382}
383
384func (p *parser) xqtagmethod() string {
385 // ../rfc/6376:1295 ../rfc/6376-eid4810
386 s := p.xhyphenatedWord()
387 // ABNF production "x-sig-q-tag-args" should probably just have been
388 // "hyphenated-word". As qp-hdr-value, it will consume ":". A similar problem does
389 // not occur for "z" because it is also "|"-delimited. We work around the potential
390 // issue by parsing "dns/txt" explicitly.
391 rem := p.s[p.o:]
392 if strings.EqualFold(s, "dns") && len(rem) >= len("/txt") && strings.EqualFold(rem[:len("/txt")], "/txt") {
393 s += p.xtaken(4)
394 } else if p.take("/") {
395 s += "/" + p.xqp(true, true, false)
396 }
397 return s
398}
399
400func isalpha(c rune) bool {
401 return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
402}
403
404func isdigit(c rune) bool {
405 return c >= '0' && c <= '9'
406}
407
408func isalphadigit(c rune) bool {
409 return isalpha(c) || isdigit(c)
410}
411
412// ../rfc/6376:469
413func (p *parser) xhyphenatedWord() string {
414 return p.xtakefn1(false, func(c rune, i int) bool {
415 return isalpha(c) || i > 0 && isdigit(c) || i > 0 && c == '-' && p.o+i+1 < len(p.s) && isalphadigit(rune(p.s[p.o+i+1]))
416 })
417}
418
419// ../rfc/6376:474
420func (p *parser) xqphdrvalue(ignoreFWS bool) string {
421 return p.xqp(true, false, ignoreFWS)
422}
423
424func (p *parser) xqpSection() string {
425 return p.xqp(false, false, false)
426}
427
428// dkim-quoted-printable (pipeEncoded true) or qp-section.
429//
430// It is described in terms of (lots of) modifications to MIME quoted-printable,
431// but it may be simpler to just ignore that reference.
432//
433// ignoreFWS is required for "z=", which can have FWS anywhere.
434func (p *parser) xqp(pipeEncoded, colonEncoded, ignoreFWS bool) string {
435 // ../rfc/6376:494 ../rfc/2045:1260
436
437 hex := func(c byte) rune {
438 if c >= '0' && c <= '9' {
439 return rune(c - '0')
440 }
441 return rune(10 + c - 'A')
442 }
443
444 s := ""
445 for !p.empty() {
446 p.fws()
447 if pipeEncoded && p.hasPrefix("|") {
448 break
449 }
450 if colonEncoded && p.hasPrefix(":") {
451 break
452 }
453 if p.take("=") {
454 h := p.xtakefn(ignoreFWS, func(c rune, i int) bool {
455 return i < 2 && (c >= '0' && c <= '9' || c >= 'A' && c <= 'Z')
456 })
457 if len(h) != 2 {
458 p.xerrorf("expected qp-hdr-value")
459 }
460 c := (hex(h[0]) << 4) | hex(h[1])
461 s += string(c)
462 continue
463 }
464 x := p.xtakefn(ignoreFWS, func(c rune, i int) bool {
465 return c > ' ' && c < 0x7f && c != ';' && c != '=' && !(pipeEncoded && c == '|')
466 })
467 if x == "" {
468 break
469 }
470 s += x
471 }
472 return s
473}
474
475func (p *parser) xtimestamp() int64 {
476 // ../rfc/6376:1325 ../rfc/6376:1358
477 return p.xnumber(12)
478}
479
480func (p *parser) xcopiedHeaderFields() []string {
481 // ../rfc/6376:1384
482 l := []string{p.xztagcopy()}
483 for p.hasPrefix("|") {
484 p.xtake("|")
485 p.fws()
486 l = append(l, p.xztagcopy())
487 }
488 return l
489}
490
491func (p *parser) xztagcopy() string {
492 // ABNF does not mention FWS (unlike for other fields), but FWS is allowed everywhere in the value...
493 // ../rfc/6376:1386 ../rfc/6376:1372
494 f := p.xhdrName(true)
495 p.fws()
496 p.xtake(":")
497 v := p.xqphdrvalue(true)
498 return f + ":" + v
499}
500