14	"github.com/mjl-/mox/mlog"
 
17var pkglog = mlog.New("message", nil)
 
19func tcheck(t *testing.T, err error, msg string) {
 
22		t.Fatalf("%s: %s", msg, err)
 
26func tcompare(t *testing.T, got, exp any) {
 
28	if !reflect.DeepEqual(got, exp) {
 
29		t.Fatalf("got %v, expected %v", got, exp)
 
33func tfail(t *testing.T, err, expErr error) {
 
35	if (err == nil) != (expErr == nil) || expErr != nil && !errors.Is(err, expErr) {
 
36		t.Fatalf("got err %v, expected %v", err, expErr)
 
40func TestEmptyHeader(t *testing.T) {
 
42	p, err := EnsurePart(pkglog.Logger, true, strings.NewReader(s), int64(len(s)))
 
43	tcheck(t, err, "parse empty headers")
 
44	buf, err := io.ReadAll(p.Reader())
 
45	tcheck(t, err, "read")
 
47	tcompare(t, string(buf), expBody)
 
48	tcompare(t, p.MediaType, "")
 
49	tcompare(t, p.MediaSubType, "")
 
52func TestBadContentType(t *testing.T) {
 
55	// Pedantic is like strict.
 
57	s := "content-type: text/html;;\r\n\r\ntest"
 
58	p, err := EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
 
59	tfail(t, err, ErrBadContentType)
 
60	buf, err := io.ReadAll(p.Reader())
 
61	tcheck(t, err, "read")
 
62	tcompare(t, string(buf), expBody)
 
63	tcompare(t, p.MediaType, "APPLICATION")
 
64	tcompare(t, p.MediaSubType, "OCTET-STREAM")
 
68	s = "content-type: text/html;;\r\n\r\ntest"
 
69	p, err = EnsurePart(pkglog.Logger, true, strings.NewReader(s), int64(len(s)))
 
70	tfail(t, err, ErrBadContentType)
 
71	buf, err = io.ReadAll(p.Reader())
 
72	tcheck(t, err, "read")
 
73	tcompare(t, string(buf), expBody)
 
74	tcompare(t, p.MediaType, "APPLICATION")
 
75	tcompare(t, p.MediaSubType, "OCTET-STREAM")
 
77	// Non-strict but unrecoverable content-type.
 
78	s = "content-type: not a content type;;\r\n\r\ntest"
 
79	p, err = EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
 
80	tcheck(t, err, "parsing message with bad but recoverable content-type")
 
81	buf, err = io.ReadAll(p.Reader())
 
82	tcheck(t, err, "read")
 
83	tcompare(t, string(buf), expBody)
 
84	tcompare(t, p.MediaType, "APPLICATION")
 
85	tcompare(t, p.MediaSubType, "OCTET-STREAM")
 
87	// We try to use only the content-type, typically better than application/octet-stream.
 
88	s = "content-type: text/html;;\r\n\r\ntest"
 
89	p, err = EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
 
90	tcheck(t, err, "parsing message with bad but recoverable content-type")
 
91	buf, err = io.ReadAll(p.Reader())
 
92	tcheck(t, err, "read")
 
93	tcompare(t, string(buf), expBody)
 
94	tcompare(t, p.MediaType, "TEXT")
 
95	tcompare(t, p.MediaSubType, "HTML")
 
97	// Not recovering multipart, we won't have a boundary.
 
98	s = "content-type: multipart/mixed;;\r\n\r\ntest"
 
99	p, err = EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
 
100	tcheck(t, err, "parsing message with bad but recoverable content-type")
 
101	buf, err = io.ReadAll(p.Reader())
 
102	tcheck(t, err, "read")
 
103	tcompare(t, string(buf), expBody)
 
104	tcompare(t, p.MediaType, "APPLICATION")
 
105	tcompare(t, p.MediaSubType, "OCTET-STREAM")
 
108func TestBareCR(t *testing.T) {
 
109	s := "content-type: text/html\r\n\r\nbare\rcr\r\n"
 
110	expBody := "bare\rcr\r\n"
 
112	// Pedantic is like strict.
 
114	p, err := EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
 
115	tfail(t, err, errBareCR)
 
116	_, err = io.ReadAll(p.Reader())
 
117	tfail(t, err, errBareCR)
 
121	p, err = EnsurePart(pkglog.Logger, true, strings.NewReader(s), int64(len(s)))
 
122	tfail(t, err, errBareCR)
 
123	_, err = io.ReadAll(p.Reader())
 
124	tcheck(t, err, "read fallback part without error")
 
126	// Non-strict allows bare cr.
 
127	p, err = EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
 
128	tcheck(t, err, "parse")
 
129	buf, err := io.ReadAll(p.Reader())
 
130	tcheck(t, err, "read")
 
131	tcompare(t, string(buf), expBody)
 
134var basicMsg = strings.ReplaceAll(`From: <mjl@mox.example>
 
135Content-Type: text/plain
 
136Content-Transfer-Encoding: base64
 
141func TestBasic(t *testing.T) {
 
142	r := strings.NewReader(basicMsg)
 
143	p, err := Parse(pkglog.Logger, true, r)
 
144	tcheck(t, err, "new reader")
 
146	buf, err := io.ReadAll(p.RawReader())
 
147	tcheck(t, err, "read raw")
 
148	expBody := "aGkK\r\n"
 
149	tcompare(t, string(buf), expBody)
 
151	buf, err = io.ReadAll(p.Reader())
 
152	tcheck(t, err, "read decoded")
 
153	tcompare(t, string(buf), "hi\r\n")
 
155	if p.RawLineCount != 1 {
 
156		t.Fatalf("basic message, got %d lines, expected 1", p.RawLineCount)
 
158	if size := p.EndOffset - p.BodyOffset; size != int64(len(expBody)) {
 
159		t.Fatalf("basic message, got size %d, expected %d", size, len(expBody))
 
164var basicMsg2 = strings.ReplaceAll(`Date: Mon, 7 Feb 1994 21:52:25 -0800 (PST)
 
165From: Fred Foobar <foobar@Blurdybloop.example>
 
166Subject: afternoon meeting
 
167To: mooch@owatagu.siam.edu.example
 
168Message-Id: <B27397-0100000@Blurdybloop.example>
 
170Content-Type: TEXT/PLAIN; CHARSET=US-ASCII
 
172Hello Joe, do you think we can meet at 3:30 tomorrow?
 
176func TestBasic2(t *testing.T) {
 
177	r := strings.NewReader(basicMsg2)
 
178	p, err := Parse(pkglog.Logger, true, r)
 
179	tcheck(t, err, "new reader")
 
181	buf, err := io.ReadAll(p.RawReader())
 
182	tcheck(t, err, "read raw")
 
183	expBody := "Hello Joe, do you think we can meet at 3:30 tomorrow?\r\n\r\n"
 
184	tcompare(t, string(buf), expBody)
 
186	buf, err = io.ReadAll(p.Reader())
 
187	tcheck(t, err, "read decoded")
 
188	tcompare(t, string(buf), expBody)
 
190	if p.RawLineCount != 2 {
 
191		t.Fatalf("basic message, got %d lines, expected 2", p.RawLineCount)
 
193	if size := p.EndOffset - p.BodyOffset; size != int64(len(expBody)) {
 
194		t.Fatalf("basic message, got size %d, expected %d", size, len(expBody))
 
197	r = strings.NewReader(basicMsg2)
 
198	p, err = Parse(pkglog.Logger, true, r)
 
199	tcheck(t, err, "new reader")
 
200	err = p.Walk(pkglog.Logger, nil)
 
201	tcheck(t, err, "walk")
 
202	if p.RawLineCount != 2 {
 
203		t.Fatalf("basic message, got %d lines, expected 2", p.RawLineCount)
 
205	if size := p.EndOffset - p.BodyOffset; size != int64(len(expBody)) {
 
206		t.Fatalf("basic message, got size %d, expected %d", size, len(expBody))
 
210var mimeMsg = strings.ReplaceAll(`From: Nathaniel Borenstein <nsb@bellcore.com>
 
211To: Ned Freed <ned@innosoft.com>
 
212Date: Sun, 21 Mar 1993 23:56:48 -0800 (PST)
 
213Subject: Sample message
 
215Content-type: multipart/mixed; boundary="simple boundary"
 
217This is the preamble.  It is to be ignored, though it
 
218is a handy place for composition agents to include an
 
219explanatory note to non-MIME conformant readers.
 
223This is implicitly typed plain US-ASCII text.
 
224It does NOT end with a linebreak.
 
226Content-type: text/plain; charset=us-ascii
 
228This is explicitly typed plain US-ASCII text.
 
229It DOES end with a linebreak.
 
233This is the epilogue.  It is also to be ignored.
 
236func TestMime(t *testing.T) {
 
238	r := strings.NewReader(mimeMsg)
 
239	p, err := Parse(pkglog.Logger, true, r)
 
240	tcheck(t, err, "new reader")
 
241	if len(p.bound) == 0 {
 
242		t.Fatalf("got no bound, expected bound for mime message")
 
245	pp, err := p.ParseNextPart(pkglog.Logger)
 
246	tcheck(t, err, "next part")
 
247	buf, err := io.ReadAll(pp.Reader())
 
248	tcheck(t, err, "read all")
 
249	tcompare(t, string(buf), "This is implicitly typed plain US-ASCII text.\r\nIt does NOT end with a linebreak.")
 
251	pp, err = p.ParseNextPart(pkglog.Logger)
 
252	tcheck(t, err, "next part")
 
253	buf, err = io.ReadAll(pp.Reader())
 
254	tcheck(t, err, "read all")
 
255	tcompare(t, string(buf), "This is explicitly typed plain US-ASCII text.\r\nIt DOES end with a linebreak.\r\n")
 
257	_, err = p.ParseNextPart(pkglog.Logger)
 
258	tcompare(t, err, io.EOF)
 
260	if len(p.Parts) != 2 {
 
261		t.Fatalf("got %d parts, expected 2", len(p.Parts))
 
263	if p.Parts[0].RawLineCount != 2 {
 
264		t.Fatalf("got %d lines for first part, expected 2", p.Parts[0].RawLineCount)
 
266	if p.Parts[1].RawLineCount != 2 {
 
267		t.Fatalf("got %d lines for second part, expected 2", p.Parts[1].RawLineCount)
 
271func TestLongLine(t *testing.T) {
 
272	line := make([]byte, maxLineLength+1)
 
273	for i := range line {
 
276	_, err := Parse(pkglog.Logger, true, bytes.NewReader(line))
 
277	tfail(t, err, errLineTooLong)
 
280func TestBareCrLf(t *testing.T) {
 
281	parse := func(strict bool, s string) error {
 
282		p, err := Parse(pkglog.Logger, strict, strings.NewReader(s))
 
286		return p.Walk(pkglog.Logger, nil)
 
288	err := parse(false, "subject: test\ntest\r\n")
 
289	tfail(t, err, errBareLF)
 
290	err = parse(false, "\r\ntest\ntest\r\n")
 
291	tfail(t, err, errBareLF)
 
294	err = parse(false, "subject: test\rtest\r\n")
 
295	tfail(t, err, errBareCR)
 
296	err = parse(false, "\r\ntest\rtest\r\n")
 
297	tfail(t, err, errBareCR)
 
300	err = parse(true, "subject: test\rtest\r\n")
 
301	tfail(t, err, errBareCR)
 
302	err = parse(true, "\r\ntest\rtest\r\n")
 
303	tfail(t, err, errBareCR)
 
305	err = parse(false, "subject: test\rtest\r\n")
 
306	tcheck(t, err, "header with bare cr")
 
307	err = parse(false, "\r\ntest\rtest\r\n")
 
308	tcheck(t, err, "body with bare cr")
 
311func TestMissingClosingBoundary(t *testing.T) {
 
312	message := strings.ReplaceAll(`Content-Type: multipart/mixed; boundary=x
 
318	msg, err := Parse(pkglog.Logger, false, strings.NewReader(message))
 
319	tcheck(t, err, "new reader")
 
321	tfail(t, err, errMissingClosingBoundary)
 
323	msg, _ = Parse(pkglog.Logger, false, strings.NewReader(message))
 
324	err = msg.Walk(pkglog.Logger, nil)
 
325	tfail(t, err, errMissingClosingBoundary)
 
328func TestHeaderEOF(t *testing.T) {
 
329	message := "header: test"
 
330	_, err := Parse(pkglog.Logger, false, strings.NewReader(message))
 
331	tfail(t, err, errUnexpectedEOF)
 
334func TestBodyEOF(t *testing.T) {
 
335	message := "header: test\r\n\r\ntest"
 
336	msg, err := Parse(pkglog.Logger, true, strings.NewReader(message))
 
337	tcheck(t, err, "new reader")
 
338	buf, err := io.ReadAll(msg.Reader())
 
339	tcheck(t, err, "read body")
 
340	tcompare(t, string(buf), "test")
 
343func TestWalk(t *testing.T) {
 
344	var message = strings.ReplaceAll(`Content-Type: multipart/related; boundary="----=_NextPart_afb3ad6f146b12b709deac3e387a3ad7"
 
346------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7
 
347Content-Type: multipart/alternative; boundary="----=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt"
 
349------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt
 
350Content-Type: text/plain; charset="utf-8"
 
351Content-Transfer-Encoding: 8bit
 
356------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt
 
357Content-Type: text/html; charset="utf-8"
 
358Content-Transfer-Encoding: 8bit
 
362------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt--
 
363------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7--
 
367	msg, err := Parse(pkglog.Logger, false, strings.NewReader(message))
 
368	tcheck(t, err, "new reader")
 
369	enforceSequential = true
 
371		enforceSequential = false
 
374	tcheck(t, err, "walkmsg")
 
376	msg, _ = Parse(pkglog.Logger, false, strings.NewReader(message))
 
377	err = msg.Walk(pkglog.Logger, nil)
 
378	tcheck(t, err, "msg.Walk")
 
381func TestNested(t *testing.T) {
 
383	nestedMessage := strings.ReplaceAll(`MIME-Version: 1.0
 
384From: Nathaniel Borenstein <nsb@nsb.fv.com>
 
385To: Ned Freed <ned@innosoft.com>
 
386Date: Fri, 07 Oct 1994 16:15:05 -0700 (PDT)
 
387Subject: A multipart example
 
388Content-Type: multipart/mixed;
 
389              boundary=unique-boundary-1
 
391This is the preamble area of a multipart message.
 
392Mail readers that understand multipart format
 
393should ignore this preamble.
 
395If you are reading this text, you might want to
 
396consider changing to a mail reader that understands
 
397how to properly display multipart messages.
 
401  ... Some text appears here ...
 
403[Note that the blank between the boundary and the start
 
404 of the text in this part means no header fields were
 
405 given and this is text in the US-ASCII character set.
 
406 It could have been done with explicit typing as in the
 
410Content-type: text/plain; charset=US-ASCII
 
412This could have been part of the previous part, but
 
413illustrates explicit versus implicit typing of body
 
417Content-Type: multipart/parallel; boundary=unique-boundary-2
 
420Content-Type: audio/basic
 
421Content-Transfer-Encoding: base64
 
425Content-Type: image/jpeg
 
426Content-Transfer-Encoding: base64
 
432Content-type: text/enriched
 
434This is <bold><italic>enriched.</italic></bold>
 
435<smaller>as defined in RFC 1896</smaller>
 
438<bigger><bigger>cool?</bigger></bigger>
 
441Content-Type: message/rfc822
 
443From: (mailbox in US-ASCII)
 
444To: (address in US-ASCII)
 
445Subject: (subject in US-ASCII)
 
446Content-Type: Text/plain; charset=ISO-8859-1
 
447Content-Transfer-Encoding: Quoted-printable
 
449  ... Additional text in ISO-8859-1 goes here ...
 
454	msg, err := Parse(pkglog.Logger, true, strings.NewReader(nestedMessage))
 
455	tcheck(t, err, "new reader")
 
456	enforceSequential = true
 
458		enforceSequential = false
 
461	tcheck(t, err, "walkmsg")
 
463	if len(msg.Parts) != 5 {
 
464		t.Fatalf("got %d parts, expected 5", len(msg.Parts))
 
466	sub := msg.Parts[4].Message
 
468		t.Fatalf("missing part.Message")
 
470	buf, err := io.ReadAll(sub.Reader())
 
472		t.Fatalf("read message body: %v", err)
 
474	exp := "  ... Additional text in ISO-8859-1 goes here ...\r\n"
 
475	if string(buf) != exp {
 
476		t.Fatalf("got %q, expected %q", buf, exp)
 
479	msg, _ = Parse(pkglog.Logger, false, strings.NewReader(nestedMessage))
 
480	err = msg.Walk(pkglog.Logger, nil)
 
481	tcheck(t, err, "msg.Walk")
 
485func TestWalkdir(t *testing.T) {
 
486	// Ensure these dirs exist. Developers should bring their own ham/spam example
 
488	os.MkdirAll("../testdata/train/ham", 0770)
 
489	os.MkdirAll("../testdata/train/spam", 0770)
 
492	twalkdir(t, "../testdata/train/ham", &n, &nfail)
 
493	twalkdir(t, "../testdata/train/spam", &n, &nfail)
 
494	log.Printf("parsing messages: %d/%d failed", nfail, n)
 
497func twalkdir(t *testing.T, dir string, n, nfail *int) {
 
498	names, err := os.ReadDir(dir)
 
499	tcheck(t, err, "readdir")
 
500	if len(names) > 1000 {
 
503	for _, name := range names {
 
504		p := filepath.Join(dir, name.Name())
 
509			log.Printf("%s: %v", p, err)
 
514func walk(path string) error {
 
515	r, err := os.Open(path)
 
520	msg, err := Parse(pkglog.Logger, false, r)
 
527func walkmsg(msg *Part) error {
 
528	enforceSequential = true
 
530		enforceSequential = false
 
533	if len(msg.bound) == 0 {
 
534		buf, err := io.ReadAll(msg.Reader())
 
539		if msg.MediaType == "MESSAGE" && (msg.MediaSubType == "RFC822" || msg.MediaSubType == "GLOBAL") {
 
540			mp, err := Parse(pkglog.Logger, false, bytes.NewReader(buf))
 
548		size := msg.EndOffset - msg.BodyOffset
 
550			log.Printf("msg %v", msg)
 
551			panic("inconsistent body/end offset")
 
553		sr := io.NewSectionReader(msg.r, msg.BodyOffset, size)
 
554		decsr := msg.bodyReader(sr)
 
555		buf2, err := io.ReadAll(decsr)
 
560		if !bytes.Equal(buf, buf2) {
 
561			panic("data mismatch reading sequentially vs via offsets")
 
568		pp, err := msg.ParseNextPart(pkglog.Logger)
 
575		if err := walkmsg(pp); err != nil {
 
578		enforceSequential = true
 
582func TestEmbedded(t *testing.T) {
 
583	f, err := os.Open("../testdata/message/message-rfc822-multipart.eml")
 
584	tcheck(t, err, "open")
 
586	tcheck(t, err, "stat")
 
587	_, err = EnsurePart(pkglog.Logger, false, f, fi.Size())
 
588	tcheck(t, err, "parse")
 
591func TestEmbedded2(t *testing.T) {
 
592	buf, err := os.ReadFile("../testdata/message/message-rfc822-multipart2.eml")
 
593	tcheck(t, err, "readfile")
 
594	buf = bytes.ReplaceAll(buf, []byte("\n"), []byte("\r\n"))
 
596	_, err = EnsurePart(pkglog.Logger, false, bytes.NewReader(buf), int64(len(buf)))
 
600func TestNetMailAddress(t *testing.T) {
 
601	const s = "From: \" \"@example.com\r\n\r\nbody\r\n"
 
602	p, err := EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
 
603	tcheck(t, err, "parse")
 
604	tcompare(t, p.Envelope.From, []Address{{"", `" "`, "example.com"}})
 
607func TestParseQuotedCharset(t *testing.T) {
 
608	const s = "From: =?iso-8859-2?Q?Krist=FDna?= <k@example.com>\r\n\r\nbody\r\n"
 
609	p, err := EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
 
610	tcheck(t, err, "parse")
 
611	tcompare(t, p.Envelope.From, []Address{{"Kristýna", "k", "example.com"}})