1package imapserver
2
3import (
4 "fmt"
5 "net/textproto"
6 "strings"
7
8 "github.com/mjl-/bstore"
9
10 "github.com/mjl-/mox/message"
11 "github.com/mjl-/mox/mlog"
12 "github.com/mjl-/mox/store"
13)
14
15// Search returns messages matching criteria specified in parameters.
16//
17// State: Selected
18func (c *conn) cmdxSearch(isUID bool, tag, cmd string, p *parser) {
19 // Command: ../rfc/9051:3716 ../rfc/4731:31 ../rfc/4466:354 ../rfc/3501:2723
20 // Examples: ../rfc/9051:3986 ../rfc/4731:153 ../rfc/3501:2975
21 // Syntax: ../rfc/9051:6918 ../rfc/4466:611 ../rfc/3501:4954
22
23 // We will respond with ESEARCH instead of SEARCH if "RETURN" is present or for IMAP4rev2.
24 var eargs map[string]bool // Options except SAVE. Nil means old-style SEARCH response.
25 var save bool // For SAVE option. Kept separately for easier handling of MIN/MAX later.
26
27 // IMAP4rev2 always returns ESEARCH, even with absent RETURN.
28 if c.enabled[capIMAP4rev2] {
29 eargs = map[string]bool{}
30 }
31 // ../rfc/9051:6967
32 if p.take(" RETURN (") {
33 eargs = map[string]bool{}
34
35 for !p.take(")") {
36 if len(eargs) > 0 || save {
37 p.xspace()
38 }
39 if w, ok := p.takelist("MIN", "MAX", "ALL", "COUNT", "SAVE"); ok {
40 if w == "SAVE" {
41 save = true
42 } else {
43 eargs[w] = true
44 }
45 } else {
46 // ../rfc/4466:378 ../rfc/9051:3745
47 xsyntaxErrorf("ESEARCH result option %q not supported", w)
48 }
49 }
50 }
51 // ../rfc/4731:149 ../rfc/9051:3737
52 if eargs != nil && len(eargs) == 0 && !save {
53 eargs["ALL"] = true
54 }
55
56 // If UTF8=ACCEPT is enabled, we should not accept any charset. We are a bit more
57 // relaxed (reasonable?) and still allow US-ASCII and UTF-8. ../rfc/6855:198
58 if p.take(" CHARSET ") {
59 charset := strings.ToUpper(p.xastring())
60 if charset != "US-ASCII" && charset != "UTF-8" {
61 // ../rfc/3501:2771 ../rfc/9051:3836
62 xusercodeErrorf("BADCHARSET", "only US-ASCII and UTF-8 supported")
63 }
64 }
65 p.xspace()
66 sk := &searchKey{
67 searchKeys: []searchKey{*p.xsearchKey()},
68 }
69 for !p.empty() {
70 p.xspace()
71 sk.searchKeys = append(sk.searchKeys, *p.xsearchKey())
72 }
73
74 // Even in case of error, we ensure search result is changed.
75 if save {
76 c.searchResult = []store.UID{}
77 }
78
79 // We gather word and not-word searches from the top-level, turn them
80 // into a WordSearch for a more efficient search.
81 // todo optimize: also gather them out of AND searches.
82 var textWords, textNotWords, bodyWords, bodyNotWords []string
83 n := 0
84 for _, xsk := range sk.searchKeys {
85 switch xsk.op {
86 case "BODY":
87 bodyWords = append(bodyWords, xsk.astring)
88 continue
89 case "TEXT":
90 textWords = append(textWords, xsk.astring)
91 continue
92 case "NOT":
93 switch xsk.searchKey.op {
94 case "BODY":
95 bodyNotWords = append(bodyNotWords, xsk.searchKey.astring)
96 continue
97 case "TEXT":
98 textNotWords = append(textNotWords, xsk.searchKey.astring)
99 continue
100 }
101 }
102 sk.searchKeys[n] = xsk
103 n++
104 }
105 // We may be left with an empty but non-nil sk.searchKeys, which is important for
106 // matching.
107 sk.searchKeys = sk.searchKeys[:n]
108 var bodySearch, textSearch *store.WordSearch
109 if len(bodyWords) > 0 || len(bodyNotWords) > 0 {
110 ws := store.PrepareWordSearch(bodyWords, bodyNotWords)
111 bodySearch = &ws
112 }
113 if len(textWords) > 0 || len(textNotWords) > 0 {
114 ws := store.PrepareWordSearch(textWords, textNotWords)
115 textSearch = &ws
116 }
117
118 // Note: we only hold the account rlock for verifying the mailbox at the start.
119 c.account.RLock()
120 runlock := c.account.RUnlock
121 // Note: in a defer because we replace it below.
122 defer func() {
123 runlock()
124 }()
125
126 // If we only have a MIN and/or MAX, we can stop processing as soon as we
127 // have those matches.
128 var min, max int
129 if eargs["MIN"] {
130 min = 1
131 }
132 if eargs["MAX"] {
133 max = 1
134 }
135
136 var expungeIssued bool
137 var maxModSeq store.ModSeq
138
139 var uids []store.UID
140 c.xdbread(func(tx *bstore.Tx) {
141 c.xmailboxID(tx, c.mailboxID) // Validate.
142 runlock()
143 runlock = func() {}
144
145 // Normal forward search when we don't have MAX only.
146 var lastIndex = -1
147 if eargs == nil || max == 0 || len(eargs) != 1 {
148 for i, uid := range c.uids {
149 lastIndex = i
150 if match, modseq := c.searchMatch(tx, msgseq(i+1), uid, *sk, bodySearch, textSearch, &expungeIssued); match {
151 uids = append(uids, uid)
152 if modseq > maxModSeq {
153 maxModSeq = modseq
154 }
155 if min == 1 && min+max == len(eargs) {
156 break
157 }
158 }
159 }
160 }
161 // And reverse search for MAX if we have only MAX or MAX combined with MIN.
162 if max == 1 && (len(eargs) == 1 || min+max == len(eargs)) {
163 for i := len(c.uids) - 1; i > lastIndex; i-- {
164 if match, modseq := c.searchMatch(tx, msgseq(i+1), c.uids[i], *sk, bodySearch, textSearch, &expungeIssued); match {
165 uids = append(uids, c.uids[i])
166 if modseq > maxModSeq {
167 maxModSeq = modseq
168 }
169 break
170 }
171 }
172 }
173 })
174
175 if eargs == nil {
176 // In IMAP4rev1, an untagged SEARCH response is required. ../rfc/3501:2728
177 if len(uids) == 0 {
178 c.bwritelinef("* SEARCH")
179 }
180
181 // Old-style SEARCH response. We must spell out each number. So we may be splitting
182 // into multiple responses. ../rfc/9051:6809 ../rfc/3501:4833
183 for len(uids) > 0 {
184 n := len(uids)
185 if n > 100 {
186 n = 100
187 }
188 s := ""
189 for _, v := range uids[:n] {
190 if !isUID {
191 v = store.UID(c.xsequence(v))
192 }
193 s += " " + fmt.Sprintf("%d", v)
194 }
195
196 // Since we don't have the max modseq for the possibly partial uid range we're
197 // writing here within hand reach, we conveniently interpret the ambiguous "for all
198 // messages being returned" in ../rfc/7162:1107 as meaning over all lines that we
199 // write. And that clients only commit this value after they have seen the tagged
200 // end of the command. Appears to be recommended behaviour, ../rfc/7162:2323.
201 // ../rfc/7162:1077 ../rfc/7162:1101
202 var modseq string
203 if sk.hasModseq() {
204 // ../rfc/7162:2557
205 modseq = fmt.Sprintf(" (MODSEQ %d)", maxModSeq.Client())
206 }
207
208 c.bwritelinef("* SEARCH%s%s", s, modseq)
209 uids = uids[n:]
210 }
211 } else {
212 // New-style ESEARCH response syntax: ../rfc/9051:6546 ../rfc/4466:522
213
214 if save {
215 // ../rfc/9051:3784 ../rfc/5182:13
216 c.searchResult = uids
217 if sanityChecks {
218 checkUIDs(c.searchResult)
219 }
220 }
221
222 // No untagged ESEARCH response if nothing was requested. ../rfc/9051:4160
223 if len(eargs) > 0 {
224 resp := fmt.Sprintf("* ESEARCH (TAG %s)", tag)
225 if isUID {
226 resp += " UID"
227 }
228
229 // NOTE: we are converting UIDs to msgseq in the uids slice (if needed) while
230 // keeping the "uids" name!
231 if !isUID {
232 // If searchResult is hanging on to the slice, we need to work on a copy.
233 if save {
234 nuids := make([]store.UID, len(uids))
235 copy(nuids, uids)
236 uids = nuids
237 }
238 for i, uid := range uids {
239 uids[i] = store.UID(c.xsequence(uid))
240 }
241 }
242
243 // If no matches, then no MIN/MAX response. ../rfc/4731:98 ../rfc/9051:3758
244 if eargs["MIN"] && len(uids) > 0 {
245 resp += fmt.Sprintf(" MIN %d", uids[0])
246 }
247 if eargs["MAX"] && len(uids) > 0 {
248 resp += fmt.Sprintf(" MAX %d", uids[len(uids)-1])
249 }
250 if eargs["COUNT"] {
251 resp += fmt.Sprintf(" COUNT %d", len(uids))
252 }
253 if eargs["ALL"] && len(uids) > 0 {
254 resp += fmt.Sprintf(" ALL %s", compactUIDSet(uids).String())
255 }
256
257 // Interaction between ESEARCH and CONDSTORE: ../rfc/7162:1211 ../rfc/4731:273
258 // Summary: send the highest modseq of the returned messages.
259 if sk.hasModseq() && len(uids) > 0 {
260 resp += fmt.Sprintf(" MODSEQ %d", maxModSeq.Client())
261 }
262
263 c.bwritelinef("%s", resp)
264 }
265 }
266 if expungeIssued {
267 // ../rfc/9051:5102
268 c.writeresultf("%s OK [EXPUNGEISSUED] done", tag)
269 } else {
270 c.ok(tag, cmd)
271 }
272}
273
274type search struct {
275 c *conn
276 tx *bstore.Tx
277 seq msgseq
278 uid store.UID
279 mr *store.MsgReader
280 m store.Message
281 p *message.Part
282 expungeIssued *bool
283 hasModseq bool
284}
285
286func (c *conn) searchMatch(tx *bstore.Tx, seq msgseq, uid store.UID, sk searchKey, bodySearch, textSearch *store.WordSearch, expungeIssued *bool) (bool, store.ModSeq) {
287 s := search{c: c, tx: tx, seq: seq, uid: uid, expungeIssued: expungeIssued, hasModseq: sk.hasModseq()}
288 defer func() {
289 if s.mr != nil {
290 err := s.mr.Close()
291 c.xsanity(err, "closing messagereader")
292 s.mr = nil
293 }
294 }()
295 return s.match(sk, bodySearch, textSearch)
296}
297
298func (s *search) match(sk searchKey, bodySearch, textSearch *store.WordSearch) (match bool, modseq store.ModSeq) {
299 // Instead of littering all the cases in match0 with calls to get modseq, we do it once
300 // here in case of a match.
301 defer func() {
302 if match && s.hasModseq {
303 if s.m.ID == 0 {
304 match = s.xensureMessage()
305 }
306 modseq = s.m.ModSeq
307 }
308 }()
309
310 match = s.match0(sk)
311 if match && bodySearch != nil {
312 if !s.xensurePart() {
313 match = false
314 return
315 }
316 var err error
317 match, err = bodySearch.MatchPart(s.c.log, s.p, false)
318 xcheckf(err, "search words in bodies")
319 }
320 if match && textSearch != nil {
321 if !s.xensurePart() {
322 match = false
323 return
324 }
325 var err error
326 match, err = textSearch.MatchPart(s.c.log, s.p, true)
327 xcheckf(err, "search words in headers and bodies")
328 }
329 return
330}
331
332func (s *search) xensureMessage() bool {
333 if s.m.ID > 0 {
334 return true
335 }
336
337 q := bstore.QueryTx[store.Message](s.tx)
338 q.FilterNonzero(store.Message{MailboxID: s.c.mailboxID, UID: s.uid})
339 m, err := q.Get()
340 if err == bstore.ErrAbsent || err == nil && m.Expunged {
341 // ../rfc/2180:607
342 *s.expungeIssued = true
343 return false
344 }
345 xcheckf(err, "get message")
346 s.m = m
347 return true
348}
349
350// ensure message, reader and part are loaded. returns whether that was
351// successful.
352func (s *search) xensurePart() bool {
353 if s.mr != nil {
354 return s.p != nil
355 }
356
357 if !s.xensureMessage() {
358 return false
359 }
360
361 // Closed by searchMatch after all (recursive) search.match calls are finished.
362 s.mr = s.c.account.MessageReader(s.m)
363
364 if s.m.ParsedBuf == nil {
365 s.c.log.Error("missing parsed message")
366 return false
367 }
368 p, err := s.m.LoadPart(s.mr)
369 xcheckf(err, "load parsed message")
370 s.p = &p
371 return true
372}
373
374func (s *search) match0(sk searchKey) bool {
375 c := s.c
376
377 // Difference between sk.searchKeys nil and length 0 is important. Because we take
378 // out word/notword searches, the list may be empty but non-nil.
379 if sk.searchKeys != nil {
380 for _, ssk := range sk.searchKeys {
381 if !s.match0(ssk) {
382 return false
383 }
384 }
385 return true
386 } else if sk.seqSet != nil {
387 return sk.seqSet.containsSeq(s.seq, c.uids, c.searchResult)
388 }
389
390 filterHeader := func(field, value string) bool {
391 lower := strings.ToLower(value)
392 h, err := s.p.Header()
393 if err != nil {
394 c.log.Debugx("parsing message header", err, mlog.Field("uid", s.uid))
395 return false
396 }
397 for _, v := range h.Values(field) {
398 if strings.Contains(strings.ToLower(v), lower) {
399 return true
400 }
401 }
402 return false
403 }
404
405 // We handle ops by groups that need increasing details about the message.
406
407 switch sk.op {
408 case "ALL":
409 return true
410 case "NEW":
411 // We do not implement the RECENT flag, so messages cannot be NEW.
412 return false
413 case "OLD":
414 // We treat all messages as non-recent, so this means all messages.
415 return true
416 case "RECENT":
417 // We do not implement the RECENT flag. All messages are not recent.
418 return false
419 case "NOT":
420 return !s.match0(*sk.searchKey)
421 case "OR":
422 return s.match0(*sk.searchKey) || s.match0(*sk.searchKey2)
423 case "UID":
424 return sk.uidSet.containsUID(s.uid, c.uids, c.searchResult)
425 }
426
427 // Parsed part.
428 if !s.xensurePart() {
429 return false
430 }
431
432 // Parsed message, basic info.
433 switch sk.op {
434 case "ANSWERED":
435 return s.m.Answered
436 case "DELETED":
437 return s.m.Deleted
438 case "FLAGGED":
439 return s.m.Flagged
440 case "KEYWORD":
441 kw := strings.ToLower(sk.atom)
442 switch kw {
443 case "$forwarded":
444 return s.m.Forwarded
445 case "$junk":
446 return s.m.Junk
447 case "$notjunk":
448 return s.m.Notjunk
449 case "$phishing":
450 return s.m.Phishing
451 case "$mdnsent":
452 return s.m.MDNSent
453 default:
454 for _, k := range s.m.Keywords {
455 if k == kw {
456 return true
457 }
458 }
459 return false
460 }
461 case "SEEN":
462 return s.m.Seen
463 case "UNANSWERED":
464 return !s.m.Answered
465 case "UNDELETED":
466 return !s.m.Deleted
467 case "UNFLAGGED":
468 return !s.m.Flagged
469 case "UNKEYWORD":
470 kw := strings.ToLower(sk.atom)
471 switch kw {
472 case "$forwarded":
473 return !s.m.Forwarded
474 case "$junk":
475 return !s.m.Junk
476 case "$notjunk":
477 return !s.m.Notjunk
478 case "$phishing":
479 return !s.m.Phishing
480 case "$mdnsent":
481 return !s.m.MDNSent
482 default:
483 for _, k := range s.m.Keywords {
484 if k == kw {
485 return false
486 }
487 }
488 return true
489 }
490 case "UNSEEN":
491 return !s.m.Seen
492 case "DRAFT":
493 return s.m.Draft
494 case "UNDRAFT":
495 return !s.m.Draft
496 case "BEFORE", "ON", "SINCE":
497 skdt := sk.date.Format("2006-01-02")
498 rdt := s.m.Received.Format("2006-01-02")
499 switch sk.op {
500 case "BEFORE":
501 return rdt < skdt
502 case "ON":
503 return rdt == skdt
504 case "SINCE":
505 return rdt >= skdt
506 }
507 panic("missing case")
508 case "LARGER":
509 return s.m.Size > sk.number
510 case "SMALLER":
511 return s.m.Size < sk.number
512 case "MODSEQ":
513 // ../rfc/7162:1045
514 return s.m.ModSeq.Client() >= *sk.clientModseq
515 }
516
517 if s.p == nil {
518 c.log.Info("missing parsed message, not matching", mlog.Field("uid", s.uid))
519 return false
520 }
521
522 // Parsed message, more info.
523 switch sk.op {
524 case "BCC":
525 return filterHeader("Bcc", sk.astring)
526 case "BODY", "TEXT":
527 // We gathered word/notword searches from the top-level, but we can also get them
528 // nested.
529 // todo optimize: handle deeper nested word/not-word searches more efficiently.
530 headerToo := sk.op == "TEXT"
531 match, err := store.PrepareWordSearch([]string{sk.astring}, nil).MatchPart(s.c.log, s.p, headerToo)
532 xcheckf(err, "word search")
533 return match
534 case "CC":
535 return filterHeader("Cc", sk.astring)
536 case "FROM":
537 return filterHeader("From", sk.astring)
538 case "SUBJECT":
539 return filterHeader("Subject", sk.astring)
540 case "TO":
541 return filterHeader("To", sk.astring)
542 case "HEADER":
543 // ../rfc/9051:3895
544 lower := strings.ToLower(sk.astring)
545 h, err := s.p.Header()
546 if err != nil {
547 c.log.Errorx("parsing header for search", err, mlog.Field("uid", s.uid))
548 return false
549 }
550 k := textproto.CanonicalMIMEHeaderKey(sk.headerField)
551 for _, v := range h.Values(k) {
552 if lower == "" || strings.Contains(strings.ToLower(v), lower) {
553 return true
554 }
555 }
556 return false
557 case "SENTBEFORE", "SENTON", "SENTSINCE":
558 if s.p.Envelope == nil || s.p.Envelope.Date.IsZero() {
559 return false
560 }
561 dt := s.p.Envelope.Date.Format("2006-01-02")
562 skdt := sk.date.Format("2006-01-02")
563 switch sk.op {
564 case "SENTBEFORE":
565 return dt < skdt
566 case "SENTON":
567 return dt == skdt
568 case "SENTSINCE":
569 return dt > skdt
570 }
571 panic("missing case")
572 }
573 panic(serverError{fmt.Errorf("missing case for search key op %q", sk.op)})
574}
575