1package smtpserver

3import (

4 "errors"

5 "fmt"

6 "time"

8 "github.com/mjl-/bstore"

10 "github.com/mjl-/mox/mlog"

11 "github.com/mjl-/mox/smtp"

12 "github.com/mjl-/mox/store"

13)

15type reputationMethod string

17const (

18 methodMsgfromFull reputationMethod = "msgfromfull"

19 methodMsgtoFull reputationMethod = "msgtofull"

20 methodMsgfromDomain reputationMethod = "msgfromdomain"

21 methodMsgfromOrgDomain reputationMethod = "msgfromorgdomain"

22 methodMsgtoDomain reputationMethod = "msgtodomain"

23 methodMsgtoOrgDomain reputationMethod = "msgtoorgdomain"

24 methodDKIMSPF reputationMethod = "dkimspf"

25 methodIP1 reputationMethod = "ip1"

26 methodIP2 reputationMethod = "ip2"

27 methodIP3 reputationMethod = "ip3"

28 methodNone reputationMethod = "none"

29)

31// Reputation returns whether message m is likely junk.

32//

33// This function is called after checking for a manually configured spf mailfrom

34// allow (e.g. for mailing lists), and after checking for a dmarc reject policy.

35//

36// The decision is made based on historic messages delivered to the same

37// destination mailbox, MailboxOrigID. Because each mailbox may have a different

38// accept policy. We only use messages that have been marked as either junk or

39// non-junk. We help users by automatically marking them as non-junk when moving to

40// certain folders in the default config (e.g. the archive folder). We expect users

41// to mark junk messages as such when they read it. And to keep it in their inbox,

42// regular trash or archive if it is not.

43//

44// The basic idea is to keep accepting messages that were accepted in the past, and

45// keep rejecting those that were rejected. This is relatively easy to check if

46// mail passes SPF and/or DKIM with Message-From alignment. Regular email from

47// known people will be let in. But spammers are trickier. They will use new IPs,

48// (sub)domains, no or newly created SPF and/or DKIM identifiers, new localparts,

49// etc. This function likely ends up returning "inconclusive" for such emails. The

50// junkfilter will have to take care of a final decision.

51//

52// In case of doubt, it doesn't hurt much to accept another mail that a user has

53// communicated successfully with in the past. If the most recent message is marked

54// as junk that could have happened accidentally. If another message is let in, and

55// it is again junk, future messages will be rejected.

56//

57// Actual spammers will probably try to use identifiers, i.e. (sub)domain, dkim/spf

58// identifiers and ip addresses for which we have no history. We may only have

59// ip-based reputation, perhaps only an ip range, perhaps nothing.

60//

61// Some profiles of first-time senders:

62//

63// - Individuals. They can typically get past the junkfilter if needed.

64// - Transactional emails. They should get past the junkfilter. If they use one of

65// the larger email service providers, their reputation could help. If the

66// junkfilter rejects the message, users can recover the message from the Rejects

67// mailbox. The first message is typically initiated by a user, e.g. by registering.

68// - Desired commercial email will have to get past the junkfilter based on its

69// content. There will typically be earlier communication with the (organizational)

70// domain that would let the message through.

71// - Mailing list. May get past the junkfilter. If delivery is to a separate

72// mailbox, the junkfilter will let it in because of little history. Long enough to

73// build reputation based on DKIM/SPF signals. Users are best off to

74// configure accept rules for messages from mailing lists.

75//

76// The decision-making process looks at historic messages. The following properties

77// are checked until matching messages are found. If they are found, a decision is

78// returned, which may be inconclusive. The next property on the list is only

79// checked if a step did not match any messages.

80//

81// - Messages matching full "message from" address, either with strict/relaxed

82// dkim/spf-verification, or without.

83// - Messages the user sent to the "message from" address.

84// - Messages matching only the domain of the "message from" address (different

85// localpart), again with verification or without.

86// - Messages sent to an address in the domain of the "message from" address.

87// - The previous two checks again, but now checking against the organizational

88// domain instead of the exact domain.

89// - Matching DKIM domains and a matching SPF mailfrom, or mailfrom domain, or ehlo

90// domain.

91// - "Exact" IP, or nearby IPs.

92//

93// References:

94// ../rfc/5863

95// ../rfc/7960

96// ../rfc/6376:1915

97// ../rfc/6376:3716

98// ../rfc/7208:2167

99func reputation(tx *bstore.Tx, log *mlog.Log, m *store.Message) (rjunk *bool, rconclusive bool, rmethod reputationMethod, rerr error) {

100 boolptr := func(v bool) *bool {

101 return &v

102 }

103 xfalse := boolptr(false)

104 xtrue := boolptr(true)

105

106 type queryError string

107

108 defer func() {

109 x := recover()

110 if x == nil {

111 return

112 }

113 if xerr, ok := x.(queryError); ok {

114 rerr = errors.New(string(xerr))

115 return

116 }

117 panic(x)

118 }()

119

120 now := time.Now()

121

122 // messageQuery returns a base query for historic seen messages to the same

123 // mailbox, at most maxAge old, and at most maxCount messages.

124 messageQuery := func(fm *store.Message, maxAge time.Duration, maxCount int) *bstore.Query[store.Message] {

125 q := bstore.QueryTx[store.Message](tx)

126 q.FilterEqual("MailboxOrigID", m.MailboxID)

127 q.FilterEqual("Expunged", false)

128 q.FilterFn(func(m store.Message) bool {

129 return m.Junk || m.Notjunk

130 })

131 if fm != nil {

132 q.FilterNonzero(*fm)

133 }

134 q.FilterGreaterEqual("Received", now.Add(-maxAge))

135 q.Limit(maxCount)

136 q.SortDesc("Received")

137 return q

138 }

139

140 // Execute the query, returning messages or returning error through panic.

141 xmessageList := func(q *bstore.Query[store.Message], descr string) []store.Message {

142 t0 := time.Now()

143 l, err := q.List()

144 log.Debugx("querying messages for reputation", err, mlog.Field("msgs", len(l)), mlog.Field("descr", descr), mlog.Field("queryduration", time.Since(t0)))

145 if err != nil {

146 panic(queryError(fmt.Sprintf("listing messages: %v", err)))

147 }

148 return l

149 }

150

151 xrecipientExists := func(q *bstore.Query[store.Recipient]) bool {

152 exists, err := q.Exists()

153 if err != nil {

154 panic(queryError(fmt.Sprintf("checking for recipient: %v", err)))

155 }

156 return exists

157 }

158

159 const year = 365 * 24 * time.Hour

160

161 // Look for historic messages with same "message from" address. We'll

162 // treat any validation (strict/dmarc/relaxed) the same, but "none"

163 // separately.

164 //

165 // We only need 1 message, and sometimes look at a second message. If

166 // the last message or the message before was an accept, we accept. If

167 // the single last or last two were a reject, we reject.

168 //

169 // If there was no validation, any signal is inconclusive.

170 if m.MsgFromDomain != "" {

171 q := messageQuery(&store.Message{MsgFromLocalpart: m.MsgFromLocalpart, MsgFromDomain: m.MsgFromDomain}, 3*year, 2)

172 q.FilterEqual("MsgFromValidated", m.MsgFromValidated)

173 msgs := xmessageList(q, "mgsfromfull")

174 if len(msgs) > 0 {

175 // todo: we may want to look at dkim/spf in this case.

176 spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)

177 conclusive := m.MsgFromValidated

178 return &spam, conclusive, methodMsgfromFull, nil

179 }

180 if !m.MsgFromValidated {

181 // Look for historic messages that were validated. If present, this is likely spam.

182 // Only return as conclusively spam if history also says this From-address sent

183 // spam.

184 q := messageQuery(&store.Message{MsgFromLocalpart: m.MsgFromLocalpart, MsgFromDomain: m.MsgFromDomain, MsgFromValidated: true}, 3*year, 2)

185 msgs = xmessageList(q, "msgfromfull-validated")

186 if len(msgs) > 0 {

187 spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)

188 return xtrue, spam, methodMsgfromFull, nil

189 }

190 }

191

192 // Look if we ever sent to this address. If so, we accept,

193 qr := bstore.QueryTx[store.Recipient](tx)

194 qr.FilterEqual("Localpart", m.MsgFromLocalpart)

195 qr.FilterEqual("Domain", m.MsgFromDomain)

196 qr.FilterGreaterEqual("Sent", now.Add(-3*year))

197 if xrecipientExists(qr) {

198 return xfalse, true, methodMsgtoFull, nil

199 }

200

201 // Look for domain match, then for organizational domain match.

202 for _, orgdomain := range []bool{false, true} {

203 qm := store.Message{}

204 var method reputationMethod

205 var descr string

206 if orgdomain {

207 qm.MsgFromOrgDomain = m.MsgFromOrgDomain

208 method = methodMsgfromOrgDomain

209 descr = "msgfromorgdomain"

210 } else {

211 qm.MsgFromDomain = m.MsgFromDomain

212 method = methodMsgfromDomain

213 descr = "msgfromdomain"

214 }

215

216 q := messageQuery(&qm, 2*year, 20)

217 q.FilterEqual("MsgFromValidated", m.MsgFromValidated)

218 msgs := xmessageList(q, descr)

219 if len(msgs) > 0 {

220 nonjunk := 0

221 for _, m := range msgs {

222 if !m.Junk {

223 nonjunk++

224 }

225 }

226 if 100*nonjunk/len(msgs) > 80 {

227 return xfalse, true, method, nil

228 }

229 if nonjunk == 0 {

230 // Only conclusive with at least 3 different localparts.

231 localparts := map[smtp.Localpart]struct{}{}

232 for _, m := range msgs {

233 localparts[m.MsgFromLocalpart] = struct{}{}

234 if len(localparts) == 3 {

235 return xtrue, true, method, nil

236 }

237 }

238 return xtrue, false, method, nil

239 }

240 // Mixed signals from domain. We don't want to block a new sender.

241 return nil, false, method, nil

242 }

243 if !m.MsgFromValidated {

244 // Look for historic messages that were validated. If present, this is likely spam.

245 // Only return as conclusively spam if history also says this From-address sent

246 // spam.

247 q := messageQuery(&qm, 2*year, 2)

248 q.FilterEqual("MsgFromValidated", true)

249 msgs = xmessageList(q, descr+"-validated")

250 if len(msgs) > 0 {

251 spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)

252 return xtrue, spam, method, nil

253 }

254 }

255

256 // Look if we ever sent to this address. If so, we accept,

257 qr := bstore.QueryTx[store.Recipient](tx)

258 if orgdomain {

259 qr.FilterEqual("OrgDomain", m.MsgFromOrgDomain)

260 method = methodMsgtoOrgDomain

261 } else {

262 qr.FilterEqual("Domain", m.MsgFromDomain)

263 method = methodMsgtoDomain

264 }

265 qr.FilterGreaterEqual("Sent", now.Add(-2*year))

266 if xrecipientExists(qr) {

267 return xfalse, true, method, nil

268 }

269 }

270 }

271

272 // DKIM and SPF.

273 // We only use identities that passed validation. Failed identities are ignored. ../rfc/6376:2447

274 // todo future: we could do something with the DKIM identity (i=) field if it is more specific than just the domain (d=).

275 dkimspfsignals := []float64{}

276 dkimspfmsgs := 0

277 for _, dom := range m.DKIMDomains {

278 q := messageQuery(nil, year/2, 50)

279 q.FilterIn("DKIMDomains", dom)

280 msgs := xmessageList(q, "dkimdomain")

281 if len(msgs) > 0 {

282 nspam := 0

283 for _, m := range msgs {

284 if m.Junk {

285 nspam++

286 }

287 }

288 pspam := float64(nspam) / float64(len(msgs))

289 dkimspfsignals = append(dkimspfsignals, pspam)

290 dkimspfmsgs = len(msgs)

291 }

292 }

293 if m.MailFromValidated || m.EHLOValidated {

294 var msgs []store.Message

295 if m.MailFromValidated && m.MailFromDomain != "" {

296 q := messageQuery(&store.Message{MailFromLocalpart: m.MailFromLocalpart, MailFromDomain: m.MailFromDomain}, year/2, 50)

297 msgs = xmessageList(q, "mailfrom")

298 if len(msgs) == 0 {

299 q := messageQuery(&store.Message{MailFromDomain: m.MailFromDomain}, year/2, 50)

300 msgs = xmessageList(q, "mailfromdomain")

301 }

302 }

303 if len(msgs) == 0 && m.EHLOValidated && m.EHLODomain != "" {

304 q := messageQuery(&store.Message{EHLODomain: m.EHLODomain}, year/2, 50)

305 msgs = xmessageList(q, "ehlodomain")

306 }

307 if len(msgs) > 0 {

308 nspam := 0

309 for _, m := range msgs {

310 if m.Junk {

311 nspam++

312 }

313 }

314 pspam := float64(nspam) / float64(len(msgs))

315 dkimspfsignals = append(dkimspfsignals, pspam)

316 if len(msgs) > dkimspfmsgs {

317 dkimspfmsgs = len(msgs)

318 }

319 }

320 }

321 if len(dkimspfsignals) > 0 {

322 var nham, nspam int

323 for _, p := range dkimspfsignals {

324 if p < .1 {

325 nham++

326 } else if p > .9 {

327 nspam++

328 }

329 }

330 if nham > 0 && nspam == 0 {

331 return xfalse, true, methodDKIMSPF, nil

332 }

333 if nspam > 0 && nham == 0 {

334 return xtrue, dkimspfmsgs > 1, methodDKIMSPF, nil

335 }

336 return nil, false, methodDKIMSPF, nil

337 }

338

339 // IP-based. A wider mask needs more messages to be conclusive.

340 // We require the resulting signal to be strong, i.e. likely ham or likely spam.

341 var msgs []store.Message

342 var need int

343 var method reputationMethod

344 if m.RemoteIPMasked1 != "" {

345 q := messageQuery(&store.Message{RemoteIPMasked1: m.RemoteIPMasked1}, year/4, 50)

346 msgs = xmessageList(q, "ip1")

347 need = 2

348 method = methodIP1

349 }

350 if len(msgs) == 0 && m.RemoteIPMasked2 != "" {

351 q := messageQuery(&store.Message{RemoteIPMasked2: m.RemoteIPMasked2}, year/4, 50)

352 msgs = xmessageList(q, "ip2")

353 need = 5

354 method = methodIP2

355 }

356 if len(msgs) == 0 && m.RemoteIPMasked3 != "" {

357 q := messageQuery(&store.Message{RemoteIPMasked3: m.RemoteIPMasked3}, year/4, 50)

358 msgs = xmessageList(q, "ip3")

359 need = 10

360 method = methodIP3

361 }

362 if len(msgs) > 0 {

363 nspam := 0

364 for _, m := range msgs {

365 if m.Junk {

366 nspam++

367 }

368 }

369 pspam := float64(nspam) / float64(len(msgs))

370 var spam *bool

371 if pspam < .25 {

372 spam = xfalse

373 } else if pspam > .75 {

374 spam = xtrue

375 }

376 conclusive := len(msgs) >= need && (pspam <= 0.1 || pspam >= 0.9)

377 return spam, conclusive, method, nil

378 }

379

380 return nil, false, methodNone, nil

381}

382