14 "github.com/mjl-/mox/mlog"
15 "github.com/mjl-/mox/moxvar"
18var xlog = mlog.New("message")
20func tcheck(t *testing.T, err error, msg string) {
23 t.Fatalf("%s: %s", msg, err)
27func tcompare(t *testing.T, got, exp any) {
29 if !reflect.DeepEqual(got, exp) {
30 t.Fatalf("got %q, expected %q", got, exp)
34func tfail(t *testing.T, err, expErr error) {
36 if (err == nil) != (expErr == nil) || expErr != nil && !errors.Is(err, expErr) {
37 t.Fatalf("got err %v, expected %v", err, expErr)
41func TestEmptyHeader(t *testing.T) {
43 p, err := EnsurePart(xlog, true, strings.NewReader(s), int64(len(s)))
44 tcheck(t, err, "parse empty headers")
45 buf, err := io.ReadAll(p.Reader())
46 tcheck(t, err, "read")
48 tcompare(t, string(buf), expBody)
49 tcompare(t, p.MediaType, "")
50 tcompare(t, p.MediaSubType, "")
53func TestBadContentType(t *testing.T) {
56 // Pedantic is like strict.
57 moxvar.Pedantic = true
58 s := "content-type: text/html;;\r\n\r\ntest"
59 p, err := EnsurePart(xlog, false, strings.NewReader(s), int64(len(s)))
60 tfail(t, err, ErrBadContentType)
61 buf, err := io.ReadAll(p.Reader())
62 tcheck(t, err, "read")
63 tcompare(t, string(buf), expBody)
64 tcompare(t, p.MediaType, "APPLICATION")
65 tcompare(t, p.MediaSubType, "OCTET-STREAM")
66 moxvar.Pedantic = false
69 s = "content-type: text/html;;\r\n\r\ntest"
70 p, err = EnsurePart(xlog, true, strings.NewReader(s), int64(len(s)))
71 tfail(t, err, ErrBadContentType)
72 buf, err = io.ReadAll(p.Reader())
73 tcheck(t, err, "read")
74 tcompare(t, string(buf), expBody)
75 tcompare(t, p.MediaType, "APPLICATION")
76 tcompare(t, p.MediaSubType, "OCTET-STREAM")
78 // Non-strict but unrecoverable content-type.
79 s = "content-type: not a content type;;\r\n\r\ntest"
80 p, err = EnsurePart(xlog, false, strings.NewReader(s), int64(len(s)))
81 tcheck(t, err, "parsing message with bad but recoverable content-type")
82 buf, err = io.ReadAll(p.Reader())
83 tcheck(t, err, "read")
84 tcompare(t, string(buf), expBody)
85 tcompare(t, p.MediaType, "APPLICATION")
86 tcompare(t, p.MediaSubType, "OCTET-STREAM")
88 // We try to use only the content-type, typically better than application/octet-stream.
89 s = "content-type: text/html;;\r\n\r\ntest"
90 p, err = EnsurePart(xlog, false, strings.NewReader(s), int64(len(s)))
91 tcheck(t, err, "parsing message with bad but recoverable content-type")
92 buf, err = io.ReadAll(p.Reader())
93 tcheck(t, err, "read")
94 tcompare(t, string(buf), expBody)
95 tcompare(t, p.MediaType, "TEXT")
96 tcompare(t, p.MediaSubType, "HTML")
98 // Not recovering multipart, we won't have a boundary.
99 s = "content-type: multipart/mixed;;\r\n\r\ntest"
100 p, err = EnsurePart(xlog, false, strings.NewReader(s), int64(len(s)))
101 tcheck(t, err, "parsing message with bad but recoverable content-type")
102 buf, err = io.ReadAll(p.Reader())
103 tcheck(t, err, "read")
104 tcompare(t, string(buf), expBody)
105 tcompare(t, p.MediaType, "APPLICATION")
106 tcompare(t, p.MediaSubType, "OCTET-STREAM")
109func TestBareCR(t *testing.T) {
110 s := "content-type: text/html\r\n\r\nbare\rcr\r\n"
111 expBody := "bare\rcr\r\n"
113 // Pedantic is like strict.
114 moxvar.Pedantic = true
115 p, err := EnsurePart(xlog, false, strings.NewReader(s), int64(len(s)))
116 tfail(t, err, errBareCR)
117 _, err = io.ReadAll(p.Reader())
118 tfail(t, err, errBareCR)
119 moxvar.Pedantic = false
122 p, err = EnsurePart(xlog, true, strings.NewReader(s), int64(len(s)))
123 tfail(t, err, errBareCR)
124 _, err = io.ReadAll(p.Reader())
125 tcheck(t, err, "read fallback part without error")
127 // Non-strict allows bare cr.
128 p, err = EnsurePart(xlog, false, strings.NewReader(s), int64(len(s)))
129 tcheck(t, err, "parse")
130 buf, err := io.ReadAll(p.Reader())
131 tcheck(t, err, "read")
132 tcompare(t, string(buf), expBody)
135var basicMsg = strings.ReplaceAll(`From: <mjl@mox.example>
136Content-Type: text/plain
137Content-Transfer-Encoding: base64
142func TestBasic(t *testing.T) {
143 r := strings.NewReader(basicMsg)
144 p, err := Parse(xlog, true, r)
145 tcheck(t, err, "new reader")
147 buf, err := io.ReadAll(p.RawReader())
148 tcheck(t, err, "read raw")
149 expBody := "aGkK\r\n"
150 tcompare(t, string(buf), expBody)
152 buf, err = io.ReadAll(p.Reader())
153 tcheck(t, err, "read decoded")
154 tcompare(t, string(buf), "hi\r\n")
156 if p.RawLineCount != 1 {
157 t.Fatalf("basic message, got %d lines, expected 1", p.RawLineCount)
159 if size := p.EndOffset - p.BodyOffset; size != int64(len(expBody)) {
160 t.Fatalf("basic message, got size %d, expected %d", size, len(expBody))
165var basicMsg2 = strings.ReplaceAll(`Date: Mon, 7 Feb 1994 21:52:25 -0800 (PST)
166From: Fred Foobar <foobar@Blurdybloop.example>
167Subject: afternoon meeting
168To: mooch@owatagu.siam.edu.example
169Message-Id: <B27397-0100000@Blurdybloop.example>
171Content-Type: TEXT/PLAIN; CHARSET=US-ASCII
173Hello Joe, do you think we can meet at 3:30 tomorrow?
177func TestBasic2(t *testing.T) {
178 r := strings.NewReader(basicMsg2)
179 p, err := Parse(xlog, true, r)
180 tcheck(t, err, "new reader")
182 buf, err := io.ReadAll(p.RawReader())
183 tcheck(t, err, "read raw")
184 expBody := "Hello Joe, do you think we can meet at 3:30 tomorrow?\r\n\r\n"
185 tcompare(t, string(buf), expBody)
187 buf, err = io.ReadAll(p.Reader())
188 tcheck(t, err, "read decoded")
189 tcompare(t, string(buf), expBody)
191 if p.RawLineCount != 2 {
192 t.Fatalf("basic message, got %d lines, expected 2", p.RawLineCount)
194 if size := p.EndOffset - p.BodyOffset; size != int64(len(expBody)) {
195 t.Fatalf("basic message, got size %d, expected %d", size, len(expBody))
198 r = strings.NewReader(basicMsg2)
199 p, err = Parse(xlog, true, r)
200 tcheck(t, err, "new reader")
201 err = p.Walk(xlog, nil)
202 tcheck(t, err, "walk")
203 if p.RawLineCount != 2 {
204 t.Fatalf("basic message, got %d lines, expected 2", p.RawLineCount)
206 if size := p.EndOffset - p.BodyOffset; size != int64(len(expBody)) {
207 t.Fatalf("basic message, got size %d, expected %d", size, len(expBody))
211var mimeMsg = strings.ReplaceAll(`From: Nathaniel Borenstein <nsb@bellcore.com>
212To: Ned Freed <ned@innosoft.com>
213Date: Sun, 21 Mar 1993 23:56:48 -0800 (PST)
214Subject: Sample message
216Content-type: multipart/mixed; boundary="simple boundary"
218This is the preamble. It is to be ignored, though it
219is a handy place for composition agents to include an
220explanatory note to non-MIME conformant readers.
224This is implicitly typed plain US-ASCII text.
225It does NOT end with a linebreak.
227Content-type: text/plain; charset=us-ascii
229This is explicitly typed plain US-ASCII text.
230It DOES end with a linebreak.
234This is the epilogue. It is also to be ignored.
237func TestMime(t *testing.T) {
239 r := strings.NewReader(mimeMsg)
240 p, err := Parse(xlog, true, r)
241 tcheck(t, err, "new reader")
242 if len(p.bound) == 0 {
243 t.Fatalf("got no bound, expected bound for mime message")
246 pp, err := p.ParseNextPart(xlog)
247 tcheck(t, err, "next part")
248 buf, err := io.ReadAll(pp.Reader())
249 tcheck(t, err, "read all")
250 tcompare(t, string(buf), "This is implicitly typed plain US-ASCII text.\r\nIt does NOT end with a linebreak.")
252 pp, err = p.ParseNextPart(xlog)
253 tcheck(t, err, "next part")
254 buf, err = io.ReadAll(pp.Reader())
255 tcheck(t, err, "read all")
256 tcompare(t, string(buf), "This is explicitly typed plain US-ASCII text.\r\nIt DOES end with a linebreak.\r\n")
258 _, err = p.ParseNextPart(xlog)
259 tcompare(t, err, io.EOF)
261 if len(p.Parts) != 2 {
262 t.Fatalf("got %d parts, expected 2", len(p.Parts))
264 if p.Parts[0].RawLineCount != 2 {
265 t.Fatalf("got %d lines for first part, expected 2", p.Parts[0].RawLineCount)
267 if p.Parts[1].RawLineCount != 2 {
268 t.Fatalf("got %d lines for second part, expected 2", p.Parts[1].RawLineCount)
272func TestLongLine(t *testing.T) {
273 line := make([]byte, maxLineLength+1)
274 for i := range line {
277 _, err := Parse(xlog, true, bytes.NewReader(line))
278 tfail(t, err, errLineTooLong)
281func TestBareCrLf(t *testing.T) {
282 parse := func(strict bool, s string) error {
283 p, err := Parse(xlog, strict, strings.NewReader(s))
287 return p.Walk(xlog, nil)
289 err := parse(false, "subject: test\ntest\r\n")
290 tfail(t, err, errBareLF)
291 err = parse(false, "\r\ntest\ntest\r\n")
292 tfail(t, err, errBareLF)
294 moxvar.Pedantic = true
295 err = parse(false, "subject: test\rtest\r\n")
296 tfail(t, err, errBareCR)
297 err = parse(false, "\r\ntest\rtest\r\n")
298 tfail(t, err, errBareCR)
299 moxvar.Pedantic = false
301 err = parse(true, "subject: test\rtest\r\n")
302 tfail(t, err, errBareCR)
303 err = parse(true, "\r\ntest\rtest\r\n")
304 tfail(t, err, errBareCR)
306 err = parse(false, "subject: test\rtest\r\n")
307 tcheck(t, err, "header with bare cr")
308 err = parse(false, "\r\ntest\rtest\r\n")
309 tcheck(t, err, "body with bare cr")
312func TestMissingClosingBoundary(t *testing.T) {
313 message := strings.ReplaceAll(`Content-Type: multipart/mixed; boundary=x
319 msg, err := Parse(xlog, false, strings.NewReader(message))
320 tcheck(t, err, "new reader")
322 tfail(t, err, errMissingClosingBoundary)
324 msg, _ = Parse(xlog, false, strings.NewReader(message))
325 err = msg.Walk(xlog, nil)
326 tfail(t, err, errMissingClosingBoundary)
329func TestHeaderEOF(t *testing.T) {
330 message := "header: test"
331 _, err := Parse(xlog, false, strings.NewReader(message))
332 tfail(t, err, errUnexpectedEOF)
335func TestBodyEOF(t *testing.T) {
336 message := "header: test\r\n\r\ntest"
337 msg, err := Parse(xlog, true, strings.NewReader(message))
338 tcheck(t, err, "new reader")
339 buf, err := io.ReadAll(msg.Reader())
340 tcheck(t, err, "read body")
341 tcompare(t, string(buf), "test")
344func TestWalk(t *testing.T) {
345 var message = strings.ReplaceAll(`Content-Type: multipart/related; boundary="----=_NextPart_afb3ad6f146b12b709deac3e387a3ad7"
347------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7
348Content-Type: multipart/alternative; boundary="----=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt"
350------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt
351Content-Type: text/plain; charset="utf-8"
352Content-Transfer-Encoding: 8bit
357------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt
358Content-Type: text/html; charset="utf-8"
359Content-Transfer-Encoding: 8bit
363------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt--
364------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7--
368 msg, err := Parse(xlog, false, strings.NewReader(message))
369 tcheck(t, err, "new reader")
370 enforceSequential = true
372 enforceSequential = false
375 tcheck(t, err, "walkmsg")
377 msg, _ = Parse(xlog, false, strings.NewReader(message))
378 err = msg.Walk(xlog, nil)
379 tcheck(t, err, "msg.Walk")
382func TestNested(t *testing.T) {
384 nestedMessage := strings.ReplaceAll(`MIME-Version: 1.0
385From: Nathaniel Borenstein <nsb@nsb.fv.com>
386To: Ned Freed <ned@innosoft.com>
387Date: Fri, 07 Oct 1994 16:15:05 -0700 (PDT)
388Subject: A multipart example
389Content-Type: multipart/mixed;
390 boundary=unique-boundary-1
392This is the preamble area of a multipart message.
393Mail readers that understand multipart format
394should ignore this preamble.
396If you are reading this text, you might want to
397consider changing to a mail reader that understands
398how to properly display multipart messages.
402 ... Some text appears here ...
404[Note that the blank between the boundary and the start
405 of the text in this part means no header fields were
406 given and this is text in the US-ASCII character set.
407 It could have been done with explicit typing as in the
411Content-type: text/plain; charset=US-ASCII
413This could have been part of the previous part, but
414illustrates explicit versus implicit typing of body
418Content-Type: multipart/parallel; boundary=unique-boundary-2
421Content-Type: audio/basic
422Content-Transfer-Encoding: base64
426Content-Type: image/jpeg
427Content-Transfer-Encoding: base64
433Content-type: text/enriched
435This is <bold><italic>enriched.</italic></bold>
436<smaller>as defined in RFC 1896</smaller>
439<bigger><bigger>cool?</bigger></bigger>
442Content-Type: message/rfc822
444From: (mailbox in US-ASCII)
445To: (address in US-ASCII)
446Subject: (subject in US-ASCII)
447Content-Type: Text/plain; charset=ISO-8859-1
448Content-Transfer-Encoding: Quoted-printable
450 ... Additional text in ISO-8859-1 goes here ...
455 msg, err := Parse(xlog, true, strings.NewReader(nestedMessage))
456 tcheck(t, err, "new reader")
457 enforceSequential = true
459 enforceSequential = false
462 tcheck(t, err, "walkmsg")
464 if len(msg.Parts) != 5 {
465 t.Fatalf("got %d parts, expected 5", len(msg.Parts))
467 sub := msg.Parts[4].Message
469 t.Fatalf("missing part.Message")
471 buf, err := io.ReadAll(sub.Reader())
473 t.Fatalf("read message body: %v", err)
475 exp := " ... Additional text in ISO-8859-1 goes here ...\r\n"
476 if string(buf) != exp {
477 t.Fatalf("got %q, expected %q", buf, exp)
480 msg, _ = Parse(xlog, false, strings.NewReader(nestedMessage))
481 err = msg.Walk(xlog, nil)
482 tcheck(t, err, "msg.Walk")
486func TestWalkdir(t *testing.T) {
487 // Ensure these dirs exist. Developers should bring their own ham/spam example
489 os.MkdirAll("../testdata/train/ham", 0770)
490 os.MkdirAll("../testdata/train/spam", 0770)
493 twalkdir(t, "../testdata/train/ham", &n, &nfail)
494 twalkdir(t, "../testdata/train/spam", &n, &nfail)
495 log.Printf("parsing messages: %d/%d failed", nfail, n)
498func twalkdir(t *testing.T, dir string, n, nfail *int) {
499 names, err := os.ReadDir(dir)
500 tcheck(t, err, "readdir")
501 if len(names) > 1000 {
504 for _, name := range names {
505 p := filepath.Join(dir, name.Name())
510 log.Printf("%s: %v", p, err)
515func walk(path string) error {
516 r, err := os.Open(path)
521 msg, err := Parse(xlog, false, r)
528func walkmsg(msg *Part) error {
529 enforceSequential = true
531 enforceSequential = false
534 if len(msg.bound) == 0 {
535 buf, err := io.ReadAll(msg.Reader())
540 if msg.MediaType == "MESSAGE" && (msg.MediaSubType == "RFC822" || msg.MediaSubType == "GLOBAL") {
541 mp, err := Parse(xlog, false, bytes.NewReader(buf))
549 size := msg.EndOffset - msg.BodyOffset
551 log.Printf("msg %v", msg)
552 panic("inconsistent body/end offset")
554 sr := io.NewSectionReader(msg.r, msg.BodyOffset, size)
555 decsr := msg.bodyReader(sr)
556 buf2, err := io.ReadAll(decsr)
561 if !bytes.Equal(buf, buf2) {
562 panic("data mismatch reading sequentially vs via offsets")
569 pp, err := msg.ParseNextPart(xlog)
576 if err := walkmsg(pp); err != nil {
579 enforceSequential = true
583func TestEmbedded(t *testing.T) {
584 f, err := os.Open("../testdata/message/message-rfc822-multipart.eml")
585 tcheck(t, err, "open")
587 tcheck(t, err, "stat")
588 _, err = EnsurePart(xlog, false, f, fi.Size())
589 tcheck(t, err, "parse")
592func TestEmbedded2(t *testing.T) {
593 buf, err := os.ReadFile("../testdata/message/message-rfc822-multipart2.eml")
594 tcheck(t, err, "readfile")
595 buf = bytes.ReplaceAll(buf, []byte("\n"), []byte("\r\n"))
597 _, err = EnsurePart(xlog, false, bytes.NewReader(buf), int64(len(buf)))