1package message
2
3import (
4 "bytes"
5 "errors"
6 "io"
7 "log"
8 "os"
9 "path/filepath"
10 "reflect"
11 "strings"
12 "testing"
13
14 "github.com/mjl-/mox/mlog"
15 "github.com/mjl-/mox/moxvar"
16)
17
18var xlog = mlog.New("message")
19
20func tcheck(t *testing.T, err error, msg string) {
21 t.Helper()
22 if err != nil {
23 t.Fatalf("%s: %s", msg, err)
24 }
25}
26
27func tcompare(t *testing.T, got, exp any) {
28 t.Helper()
29 if !reflect.DeepEqual(got, exp) {
30 t.Fatalf("got %q, expected %q", got, exp)
31 }
32}
33
34func tfail(t *testing.T, err, expErr error) {
35 t.Helper()
36 if (err == nil) != (expErr == nil) || expErr != nil && !errors.Is(err, expErr) {
37 t.Fatalf("got err %v, expected %v", err, expErr)
38 }
39}
40
41func TestEmptyHeader(t *testing.T) {
42 s := "\r\nx"
43 p, err := EnsurePart(xlog, true, strings.NewReader(s), int64(len(s)))
44 tcheck(t, err, "parse empty headers")
45 buf, err := io.ReadAll(p.Reader())
46 tcheck(t, err, "read")
47 expBody := "x"
48 tcompare(t, string(buf), expBody)
49 tcompare(t, p.MediaType, "")
50 tcompare(t, p.MediaSubType, "")
51}
52
53func TestBadContentType(t *testing.T) {
54 expBody := "test"
55
56 // Pedantic is like strict.
57 moxvar.Pedantic = true
58 s := "content-type: text/html;;\r\n\r\ntest"
59 p, err := EnsurePart(xlog, false, strings.NewReader(s), int64(len(s)))
60 tfail(t, err, ErrBadContentType)
61 buf, err := io.ReadAll(p.Reader())
62 tcheck(t, err, "read")
63 tcompare(t, string(buf), expBody)
64 tcompare(t, p.MediaType, "APPLICATION")
65 tcompare(t, p.MediaSubType, "OCTET-STREAM")
66 moxvar.Pedantic = false
67
68 // Strict
69 s = "content-type: text/html;;\r\n\r\ntest"
70 p, err = EnsurePart(xlog, true, strings.NewReader(s), int64(len(s)))
71 tfail(t, err, ErrBadContentType)
72 buf, err = io.ReadAll(p.Reader())
73 tcheck(t, err, "read")
74 tcompare(t, string(buf), expBody)
75 tcompare(t, p.MediaType, "APPLICATION")
76 tcompare(t, p.MediaSubType, "OCTET-STREAM")
77
78 // Non-strict but unrecoverable content-type.
79 s = "content-type: not a content type;;\r\n\r\ntest"
80 p, err = EnsurePart(xlog, false, strings.NewReader(s), int64(len(s)))
81 tcheck(t, err, "parsing message with bad but recoverable content-type")
82 buf, err = io.ReadAll(p.Reader())
83 tcheck(t, err, "read")
84 tcompare(t, string(buf), expBody)
85 tcompare(t, p.MediaType, "APPLICATION")
86 tcompare(t, p.MediaSubType, "OCTET-STREAM")
87
88 // We try to use only the content-type, typically better than application/octet-stream.
89 s = "content-type: text/html;;\r\n\r\ntest"
90 p, err = EnsurePart(xlog, false, strings.NewReader(s), int64(len(s)))
91 tcheck(t, err, "parsing message with bad but recoverable content-type")
92 buf, err = io.ReadAll(p.Reader())
93 tcheck(t, err, "read")
94 tcompare(t, string(buf), expBody)
95 tcompare(t, p.MediaType, "TEXT")
96 tcompare(t, p.MediaSubType, "HTML")
97
98 // Not recovering multipart, we won't have a boundary.
99 s = "content-type: multipart/mixed;;\r\n\r\ntest"
100 p, err = EnsurePart(xlog, false, strings.NewReader(s), int64(len(s)))
101 tcheck(t, err, "parsing message with bad but recoverable content-type")
102 buf, err = io.ReadAll(p.Reader())
103 tcheck(t, err, "read")
104 tcompare(t, string(buf), expBody)
105 tcompare(t, p.MediaType, "APPLICATION")
106 tcompare(t, p.MediaSubType, "OCTET-STREAM")
107}
108
109func TestBareCR(t *testing.T) {
110 s := "content-type: text/html\r\n\r\nbare\rcr\r\n"
111 expBody := "bare\rcr\r\n"
112
113 // Pedantic is like strict.
114 moxvar.Pedantic = true
115 p, err := EnsurePart(xlog, false, strings.NewReader(s), int64(len(s)))
116 tfail(t, err, errBareCR)
117 _, err = io.ReadAll(p.Reader())
118 tfail(t, err, errBareCR)
119 moxvar.Pedantic = false
120
121 // Strict.
122 p, err = EnsurePart(xlog, true, strings.NewReader(s), int64(len(s)))
123 tfail(t, err, errBareCR)
124 _, err = io.ReadAll(p.Reader())
125 tcheck(t, err, "read fallback part without error")
126
127 // Non-strict allows bare cr.
128 p, err = EnsurePart(xlog, false, strings.NewReader(s), int64(len(s)))
129 tcheck(t, err, "parse")
130 buf, err := io.ReadAll(p.Reader())
131 tcheck(t, err, "read")
132 tcompare(t, string(buf), expBody)
133}
134
135var basicMsg = strings.ReplaceAll(`From: <mjl@mox.example>
136Content-Type: text/plain
137Content-Transfer-Encoding: base64
138
139aGkK
140`, "\n", "\r\n")
141
142func TestBasic(t *testing.T) {
143 r := strings.NewReader(basicMsg)
144 p, err := Parse(xlog, true, r)
145 tcheck(t, err, "new reader")
146
147 buf, err := io.ReadAll(p.RawReader())
148 tcheck(t, err, "read raw")
149 expBody := "aGkK\r\n"
150 tcompare(t, string(buf), expBody)
151
152 buf, err = io.ReadAll(p.Reader())
153 tcheck(t, err, "read decoded")
154 tcompare(t, string(buf), "hi\r\n")
155
156 if p.RawLineCount != 1 {
157 t.Fatalf("basic message, got %d lines, expected 1", p.RawLineCount)
158 }
159 if size := p.EndOffset - p.BodyOffset; size != int64(len(expBody)) {
160 t.Fatalf("basic message, got size %d, expected %d", size, len(expBody))
161 }
162}
163
164// From ../rfc/3501:2589
165var basicMsg2 = strings.ReplaceAll(`Date: Mon, 7 Feb 1994 21:52:25 -0800 (PST)
166From: Fred Foobar <foobar@Blurdybloop.example>
167Subject: afternoon meeting
168To: mooch@owatagu.siam.edu.example
169Message-Id: <B27397-0100000@Blurdybloop.example>
170MIME-Version: 1.0
171Content-Type: TEXT/PLAIN; CHARSET=US-ASCII
172
173Hello Joe, do you think we can meet at 3:30 tomorrow?
174
175`, "\n", "\r\n")
176
177func TestBasic2(t *testing.T) {
178 r := strings.NewReader(basicMsg2)
179 p, err := Parse(xlog, true, r)
180 tcheck(t, err, "new reader")
181
182 buf, err := io.ReadAll(p.RawReader())
183 tcheck(t, err, "read raw")
184 expBody := "Hello Joe, do you think we can meet at 3:30 tomorrow?\r\n\r\n"
185 tcompare(t, string(buf), expBody)
186
187 buf, err = io.ReadAll(p.Reader())
188 tcheck(t, err, "read decoded")
189 tcompare(t, string(buf), expBody)
190
191 if p.RawLineCount != 2 {
192 t.Fatalf("basic message, got %d lines, expected 2", p.RawLineCount)
193 }
194 if size := p.EndOffset - p.BodyOffset; size != int64(len(expBody)) {
195 t.Fatalf("basic message, got size %d, expected %d", size, len(expBody))
196 }
197
198 r = strings.NewReader(basicMsg2)
199 p, err = Parse(xlog, true, r)
200 tcheck(t, err, "new reader")
201 err = p.Walk(xlog, nil)
202 tcheck(t, err, "walk")
203 if p.RawLineCount != 2 {
204 t.Fatalf("basic message, got %d lines, expected 2", p.RawLineCount)
205 }
206 if size := p.EndOffset - p.BodyOffset; size != int64(len(expBody)) {
207 t.Fatalf("basic message, got size %d, expected %d", size, len(expBody))
208 }
209}
210
211var mimeMsg = strings.ReplaceAll(`From: Nathaniel Borenstein <nsb@bellcore.com>
212To: Ned Freed <ned@innosoft.com>
213Date: Sun, 21 Mar 1993 23:56:48 -0800 (PST)
214Subject: Sample message
215MIME-Version: 1.0
216Content-type: multipart/mixed; boundary="simple boundary"
217
218This is the preamble. It is to be ignored, though it
219is a handy place for composition agents to include an
220explanatory note to non-MIME conformant readers.
221
222--simple boundary
223
224This is implicitly typed plain US-ASCII text.
225It does NOT end with a linebreak.
226--simple boundary
227Content-type: text/plain; charset=us-ascii
228
229This is explicitly typed plain US-ASCII text.
230It DOES end with a linebreak.
231
232--simple boundary--
233
234This is the epilogue. It is also to be ignored.
235`, "\n", "\r\n")
236
237func TestMime(t *testing.T) {
238 // from ../rfc/2046:1148
239 r := strings.NewReader(mimeMsg)
240 p, err := Parse(xlog, true, r)
241 tcheck(t, err, "new reader")
242 if len(p.bound) == 0 {
243 t.Fatalf("got no bound, expected bound for mime message")
244 }
245
246 pp, err := p.ParseNextPart(xlog)
247 tcheck(t, err, "next part")
248 buf, err := io.ReadAll(pp.Reader())
249 tcheck(t, err, "read all")
250 tcompare(t, string(buf), "This is implicitly typed plain US-ASCII text.\r\nIt does NOT end with a linebreak.")
251
252 pp, err = p.ParseNextPart(xlog)
253 tcheck(t, err, "next part")
254 buf, err = io.ReadAll(pp.Reader())
255 tcheck(t, err, "read all")
256 tcompare(t, string(buf), "This is explicitly typed plain US-ASCII text.\r\nIt DOES end with a linebreak.\r\n")
257
258 _, err = p.ParseNextPart(xlog)
259 tcompare(t, err, io.EOF)
260
261 if len(p.Parts) != 2 {
262 t.Fatalf("got %d parts, expected 2", len(p.Parts))
263 }
264 if p.Parts[0].RawLineCount != 2 {
265 t.Fatalf("got %d lines for first part, expected 2", p.Parts[0].RawLineCount)
266 }
267 if p.Parts[1].RawLineCount != 2 {
268 t.Fatalf("got %d lines for second part, expected 2", p.Parts[1].RawLineCount)
269 }
270}
271
272func TestLongLine(t *testing.T) {
273 line := make([]byte, maxLineLength+1)
274 for i := range line {
275 line[i] = 'a'
276 }
277 _, err := Parse(xlog, true, bytes.NewReader(line))
278 tfail(t, err, errLineTooLong)
279}
280
281func TestBareCrLf(t *testing.T) {
282 parse := func(strict bool, s string) error {
283 p, err := Parse(xlog, strict, strings.NewReader(s))
284 if err != nil {
285 return err
286 }
287 return p.Walk(xlog, nil)
288 }
289 err := parse(false, "subject: test\ntest\r\n")
290 tfail(t, err, errBareLF)
291 err = parse(false, "\r\ntest\ntest\r\n")
292 tfail(t, err, errBareLF)
293
294 moxvar.Pedantic = true
295 err = parse(false, "subject: test\rtest\r\n")
296 tfail(t, err, errBareCR)
297 err = parse(false, "\r\ntest\rtest\r\n")
298 tfail(t, err, errBareCR)
299 moxvar.Pedantic = false
300
301 err = parse(true, "subject: test\rtest\r\n")
302 tfail(t, err, errBareCR)
303 err = parse(true, "\r\ntest\rtest\r\n")
304 tfail(t, err, errBareCR)
305
306 err = parse(false, "subject: test\rtest\r\n")
307 tcheck(t, err, "header with bare cr")
308 err = parse(false, "\r\ntest\rtest\r\n")
309 tcheck(t, err, "body with bare cr")
310}
311
312func TestMissingClosingBoundary(t *testing.T) {
313 message := strings.ReplaceAll(`Content-Type: multipart/mixed; boundary=x
314
315--x
316
317test
318`, "\n", "\r\n")
319 msg, err := Parse(xlog, false, strings.NewReader(message))
320 tcheck(t, err, "new reader")
321 err = walkmsg(&msg)
322 tfail(t, err, errMissingClosingBoundary)
323
324 msg, _ = Parse(xlog, false, strings.NewReader(message))
325 err = msg.Walk(xlog, nil)
326 tfail(t, err, errMissingClosingBoundary)
327}
328
329func TestHeaderEOF(t *testing.T) {
330 message := "header: test"
331 _, err := Parse(xlog, false, strings.NewReader(message))
332 tfail(t, err, errUnexpectedEOF)
333}
334
335func TestBodyEOF(t *testing.T) {
336 message := "header: test\r\n\r\ntest"
337 msg, err := Parse(xlog, true, strings.NewReader(message))
338 tcheck(t, err, "new reader")
339 buf, err := io.ReadAll(msg.Reader())
340 tcheck(t, err, "read body")
341 tcompare(t, string(buf), "test")
342}
343
344func TestWalk(t *testing.T) {
345 var message = strings.ReplaceAll(`Content-Type: multipart/related; boundary="----=_NextPart_afb3ad6f146b12b709deac3e387a3ad7"
346
347------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7
348Content-Type: multipart/alternative; boundary="----=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt"
349
350------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt
351Content-Type: text/plain; charset="utf-8"
352Content-Transfer-Encoding: 8bit
353
354test
355
356
357------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt
358Content-Type: text/html; charset="utf-8"
359Content-Transfer-Encoding: 8bit
360
361test
362
363------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt--
364------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7--
365
366`, "\n", "\r\n")
367
368 msg, err := Parse(xlog, false, strings.NewReader(message))
369 tcheck(t, err, "new reader")
370 enforceSequential = true
371 defer func() {
372 enforceSequential = false
373 }()
374 err = walkmsg(&msg)
375 tcheck(t, err, "walkmsg")
376
377 msg, _ = Parse(xlog, false, strings.NewReader(message))
378 err = msg.Walk(xlog, nil)
379 tcheck(t, err, "msg.Walk")
380}
381
382func TestNested(t *testing.T) {
383 // From ../rfc/2049:801
384 nestedMessage := strings.ReplaceAll(`MIME-Version: 1.0
385From: Nathaniel Borenstein <nsb@nsb.fv.com>
386To: Ned Freed <ned@innosoft.com>
387Date: Fri, 07 Oct 1994 16:15:05 -0700 (PDT)
388Subject: A multipart example
389Content-Type: multipart/mixed;
390 boundary=unique-boundary-1
391
392This is the preamble area of a multipart message.
393Mail readers that understand multipart format
394should ignore this preamble.
395
396If you are reading this text, you might want to
397consider changing to a mail reader that understands
398how to properly display multipart messages.
399
400--unique-boundary-1
401
402 ... Some text appears here ...
403
404[Note that the blank between the boundary and the start
405 of the text in this part means no header fields were
406 given and this is text in the US-ASCII character set.
407 It could have been done with explicit typing as in the
408 next part.]
409
410--unique-boundary-1
411Content-type: text/plain; charset=US-ASCII
412
413This could have been part of the previous part, but
414illustrates explicit versus implicit typing of body
415parts.
416
417--unique-boundary-1
418Content-Type: multipart/parallel; boundary=unique-boundary-2
419
420--unique-boundary-2
421Content-Type: audio/basic
422Content-Transfer-Encoding: base64
423
424
425--unique-boundary-2
426Content-Type: image/jpeg
427Content-Transfer-Encoding: base64
428
429
430--unique-boundary-2--
431
432--unique-boundary-1
433Content-type: text/enriched
434
435This is <bold><italic>enriched.</italic></bold>
436<smaller>as defined in RFC 1896</smaller>
437
438Isn't it
439<bigger><bigger>cool?</bigger></bigger>
440
441--unique-boundary-1
442Content-Type: message/rfc822
443
444From: (mailbox in US-ASCII)
445To: (address in US-ASCII)
446Subject: (subject in US-ASCII)
447Content-Type: Text/plain; charset=ISO-8859-1
448Content-Transfer-Encoding: Quoted-printable
449
450 ... Additional text in ISO-8859-1 goes here ...
451
452--unique-boundary-1--
453`, "\n", "\r\n")
454
455 msg, err := Parse(xlog, true, strings.NewReader(nestedMessage))
456 tcheck(t, err, "new reader")
457 enforceSequential = true
458 defer func() {
459 enforceSequential = false
460 }()
461 err = walkmsg(&msg)
462 tcheck(t, err, "walkmsg")
463
464 if len(msg.Parts) != 5 {
465 t.Fatalf("got %d parts, expected 5", len(msg.Parts))
466 }
467 sub := msg.Parts[4].Message
468 if sub == nil {
469 t.Fatalf("missing part.Message")
470 }
471 buf, err := io.ReadAll(sub.Reader())
472 if err != nil {
473 t.Fatalf("read message body: %v", err)
474 }
475 exp := " ... Additional text in ISO-8859-1 goes here ...\r\n"
476 if string(buf) != exp {
477 t.Fatalf("got %q, expected %q", buf, exp)
478 }
479
480 msg, _ = Parse(xlog, false, strings.NewReader(nestedMessage))
481 err = msg.Walk(xlog, nil)
482 tcheck(t, err, "msg.Walk")
483
484}
485
486func TestWalkdir(t *testing.T) {
487 // Ensure these dirs exist. Developers should bring their own ham/spam example
488 // emails.
489 os.MkdirAll("../testdata/train/ham", 0770)
490 os.MkdirAll("../testdata/train/spam", 0770)
491
492 var n, nfail int
493 twalkdir(t, "../testdata/train/ham", &n, &nfail)
494 twalkdir(t, "../testdata/train/spam", &n, &nfail)
495 log.Printf("parsing messages: %d/%d failed", nfail, n)
496}
497
498func twalkdir(t *testing.T, dir string, n, nfail *int) {
499 names, err := os.ReadDir(dir)
500 tcheck(t, err, "readdir")
501 if len(names) > 1000 {
502 names = names[:1000]
503 }
504 for _, name := range names {
505 p := filepath.Join(dir, name.Name())
506 *n++
507 err := walk(p)
508 if err != nil {
509 *nfail++
510 log.Printf("%s: %v", p, err)
511 }
512 }
513}
514
515func walk(path string) error {
516 r, err := os.Open(path)
517 if err != nil {
518 return err
519 }
520 defer r.Close()
521 msg, err := Parse(xlog, false, r)
522 if err != nil {
523 return err
524 }
525 return walkmsg(&msg)
526}
527
528func walkmsg(msg *Part) error {
529 enforceSequential = true
530 defer func() {
531 enforceSequential = false
532 }()
533
534 if len(msg.bound) == 0 {
535 buf, err := io.ReadAll(msg.Reader())
536 if err != nil {
537 return err
538 }
539
540 if msg.MediaType == "MESSAGE" && (msg.MediaSubType == "RFC822" || msg.MediaSubType == "GLOBAL") {
541 mp, err := Parse(xlog, false, bytes.NewReader(buf))
542 if err != nil {
543 return err
544 }
545 msg.Message = &mp
546 walkmsg(msg.Message)
547 }
548
549 size := msg.EndOffset - msg.BodyOffset
550 if size < 0 {
551 log.Printf("msg %v", msg)
552 panic("inconsistent body/end offset")
553 }
554 sr := io.NewSectionReader(msg.r, msg.BodyOffset, size)
555 decsr := msg.bodyReader(sr)
556 buf2, err := io.ReadAll(decsr)
557 if err != nil {
558 return err
559 }
560
561 if !bytes.Equal(buf, buf2) {
562 panic("data mismatch reading sequentially vs via offsets")
563 }
564
565 return nil
566 }
567
568 for {
569 pp, err := msg.ParseNextPart(xlog)
570 if err == io.EOF {
571 return nil
572 }
573 if err != nil {
574 return err
575 }
576 if err := walkmsg(pp); err != nil {
577 return err
578 }
579 enforceSequential = true
580 }
581}
582
583func TestEmbedded(t *testing.T) {
584 f, err := os.Open("../testdata/message/message-rfc822-multipart.eml")
585 tcheck(t, err, "open")
586 fi, err := f.Stat()
587 tcheck(t, err, "stat")
588 _, err = EnsurePart(xlog, false, f, fi.Size())
589 tcheck(t, err, "parse")
590}
591
592func TestEmbedded2(t *testing.T) {
593 buf, err := os.ReadFile("../testdata/message/message-rfc822-multipart2.eml")
594 tcheck(t, err, "readfile")
595 buf = bytes.ReplaceAll(buf, []byte("\n"), []byte("\r\n"))
596
597 _, err = EnsurePart(xlog, false, bytes.NewReader(buf), int64(len(buf)))
598 tfail(t, err, nil)
599}
600