1// Package webmail implements a webmail client, serving html/js and providing an API for message actions and SSE endpoint for receiving real-time updates.
4// todo: should we be serving the messages/parts on a separate (sub)domain for user-content? to limit damage if the csp rules aren't enough.
27 "golang.org/x/net/html"
29 "github.com/prometheus/client_golang/prometheus"
30 "github.com/prometheus/client_golang/prometheus/promauto"
32 "github.com/mjl-/bstore"
33 "github.com/mjl-/sherpa"
35 "github.com/mjl-/mox/message"
36 "github.com/mjl-/mox/metrics"
37 "github.com/mjl-/mox/mlog"
38 "github.com/mjl-/mox/mox-"
39 "github.com/mjl-/mox/moxio"
40 "github.com/mjl-/mox/store"
41 "github.com/mjl-/mox/webauth"
44var pkglog = mlog.New("webmail", nil)
48// We pass the request to the sherpa handler so the TLS info can be used for
49// the Received header in submitted messages. Most API calls need just the
51var requestInfoCtxKey ctxKey = "requestInfo"
53type requestInfo struct {
56 SessionToken store.SessionToken
57 Response http.ResponseWriter
58 Request *http.Request // For Proto and TLS connection state during message submit.
61//go:embed webmail.html
68var webmailmsgHTML []byte
71var webmailmsgJS []byte
74var webmailtextHTML []byte
77var webmailtextJS []byte
80 // Similar between ../webmail/webmail.go:/metricSubmission and ../smtpserver/server.go:/metricSubmission
81 metricSubmission = promauto.NewCounterVec(
82 prometheus.CounterOpts{
83 Name: "mox_webmail_submission_total",
84 Help: "Webmail message submission results, known values (those ending with error are server errors): ok, badfrom, messagelimiterror, recipientlimiterror, queueerror, storesenterror.",
90 metricServerErrors = promauto.NewCounterVec(
91 prometheus.CounterOpts{
92 Name: "mox_webmail_errors_total",
93 Help: "Webmail server errors, known values: dkimsign, submit.",
99 metricSSEConnections = promauto.NewGauge(
100 prometheus.GaugeOpts{
101 Name: "mox_webmail_sse_connections",
102 Help: "Number of active webmail SSE connections.",
107func xcheckf(ctx context.Context, err error, format string, args ...any) {
111 msg := fmt.Sprintf(format, args...)
112 errmsg := fmt.Sprintf("%s: %s", msg, err)
113 pkglog.WithContext(ctx).Errorx(msg, err)
114 code := "server:error"
115 if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
118 panic(&sherpa.Error{Code: code, Message: errmsg})
121func xcheckuserf(ctx context.Context, err error, format string, args ...any) {
125 msg := fmt.Sprintf(format, args...)
126 errmsg := fmt.Sprintf("%s: %s", msg, err)
127 pkglog.WithContext(ctx).Errorx(msg, err)
128 panic(&sherpa.Error{Code: "user:error", Message: errmsg})
131func xdbwrite(ctx context.Context, acc *store.Account, fn func(tx *bstore.Tx)) {
132 err := acc.DB.Write(ctx, func(tx *bstore.Tx) error {
136 xcheckf(ctx, err, "transaction")
139func xdbread(ctx context.Context, acc *store.Account, fn func(tx *bstore.Tx)) {
140 err := acc.DB.Read(ctx, func(tx *bstore.Tx) error {
144 xcheckf(ctx, err, "transaction")
147var webmailFile = &mox.WebappFile{
150 HTMLPath: filepath.FromSlash("webmail/webmail.html"),
151 JSPath: filepath.FromSlash("webmail/webmail.js"),
154// Serve content, either from a file, or return the fallback data. Caller
155// should already have set the content-type. We use this to return a file from
156// the local file system (during development), or embedded in the binary (when
158func serveContentFallback(log mlog.Log, w http.ResponseWriter, r *http.Request, path string, fallback []byte) {
159 f, err := os.Open(path)
164 http.ServeContent(w, r, "", st.ModTime(), f)
168 http.ServeContent(w, r, "", mox.FallbackMtime(log), bytes.NewReader(fallback))
171// Handler returns a handler for the webmail endpoints, customized for the max
172// message size coming from the listener and cookiePath.
173func Handler(maxMessageSize int64, cookiePath string, isForwarded bool) func(w http.ResponseWriter, r *http.Request) {
174 sh, err := makeSherpaHandler(maxMessageSize, cookiePath, isForwarded)
175 return func(w http.ResponseWriter, r *http.Request) {
177 http.Error(w, "500 - internal server error - cannot handle requests", http.StatusInternalServerError)
180 handle(sh, isForwarded, w, r)
184func handle(apiHandler http.Handler, isForwarded bool, w http.ResponseWriter, r *http.Request) {
186 log := pkglog.WithContext(ctx).With(slog.String("userauth", ""))
188 // Server-sent event connection, for all initial data (list of mailboxes), list of
189 // messages, and all events afterwards. Authenticated through a token in the query
190 // string, which it got from a Token API call.
191 if r.URL.Path == "/events" {
192 serveEvents(ctx, log, w, r)
201 err, ok := x.(*sherpa.Error)
203 log.WithContext(ctx).Error("handle panic", slog.Any("err", x))
205 metrics.PanicInc(metrics.Webmailhandle)
208 if strings.HasPrefix(err.Code, "user:") {
209 log.Debugx("webmail user error", err)
210 http.Error(w, "400 - bad request - "+err.Message, http.StatusBadRequest)
212 log.Errorx("webmail server error", err)
213 http.Error(w, "500 - internal server error - "+err.Message, http.StatusInternalServerError)
222 h.Set("X-Frame-Options", "deny")
223 h.Set("Referrer-Policy", "same-origin")
224 webmailFile.Serve(ctx, log, w, r)
226 http.Error(w, "405 - method not allowed - use get", http.StatusMethodNotAllowed)
230 case "/msg.js", "/text.js":
233 http.Error(w, "405 - method not allowed - use get", http.StatusMethodNotAllowed)
238 path := filepath.Join("webmail", r.URL.Path[1:])
239 var fallback = webmailmsgJS
240 if r.URL.Path == "/text.js" {
241 fallback = webmailtextJS
244 w.Header().Set("Content-Type", "application/javascript; charset=utf-8")
245 serveContentFallback(log, w, r, path, fallback)
249 isAPI := strings.HasPrefix(r.URL.Path, "/api/")
250 // Only allow POST for calls, they will not work cross-domain without CORS.
251 if isAPI && r.URL.Path != "/api/" && r.Method != "POST" {
252 http.Error(w, "405 - method not allowed - use post", http.StatusMethodNotAllowed)
256 var loginAddress, accName string
257 var sessionToken store.SessionToken
258 // All other URLs, except the login endpoint require some authentication.
259 if r.URL.Path != "/api/LoginPrep" && r.URL.Path != "/api/Login" {
261 accName, sessionToken, loginAddress, ok = webauth.Check(ctx, log, webauth.Accounts, "webmail", isForwarded, w, r, isAPI, isAPI, false)
263 // Response has been written already.
269 reqInfo := requestInfo{loginAddress, accName, sessionToken, w, r}
270 ctx = context.WithValue(ctx, requestInfoCtxKey, reqInfo)
271 apiHandler.ServeHTTP(w, r.WithContext(ctx))
275 // We are now expecting the following URLs:
276 // .../msg/<msgid>/{attachments.zip,parsedmessage.js,raw}
277 // .../msg/<msgid>/{,msg}{text,html,htmlexternal}
278 // .../msg/<msgid>/{view,viewtext,download}/<partid>
280 if !strings.HasPrefix(r.URL.Path, "/msg/") {
285 t := strings.Split(r.URL.Path[len("/msg/"):], "/")
291 id, err := strconv.ParseInt(t[0], 10, 64)
292 if err != nil || id == 0 {
297 // Many of the requests need either a message or a parsed part. Make it easy to
298 // fetch/prepare and cleanup. We only do all the work when the request seems legit
299 // (valid HTTP route and method).
300 xprepare := func() (acc *store.Account, m store.Message, msgr *store.MsgReader, p message.Part, cleanup func(), ok bool) {
301 if r.Method != "GET" {
302 http.Error(w, "405 - method not allowed - post required", http.StatusMethodNotAllowed)
312 log.Check(err, "closing message reader")
317 log.Check(err, "closing account")
324 acc, err = store.OpenAccount(log, accName)
325 xcheckf(ctx, err, "open account")
327 m = store.Message{ID: id}
328 err = acc.DB.Get(ctx, &m)
329 if err == bstore.ErrAbsent || err == nil && m.Expunged {
333 xcheckf(ctx, err, "get message")
335 msgr = acc.MessageReader(m)
337 p, err = m.LoadPart(msgr)
338 xcheckf(ctx, err, "load parsed message")
342 log.Check(err, "closing message reader")
344 log.Check(err, "closing account")
352 // We set a Content-Security-Policy header that is as strict as possible, depending
353 // on the type of message/part/html/js. We have to be careful because we are
354 // returning data that is coming in from external places. E.g. HTML could contain
355 // javascripts that we don't want to execute, especially not on our domain. We load
356 // resources in an iframe. The CSP policy starts out with default-src 'none' to
357 // disallow loading anything, then start allowing what is safe, such as inlined
358 // datauri images and inline styles. Data can only be loaded when the request is
359 // coming from the same origin (so other sites cannot include resources
360 // (messages/parts)).
362 // We want to load resources in sandbox-mode, causing the page to be loaded as from
363 // a different origin. If sameOrigin is set, we have a looser CSP policy:
364 // allow-same-origin is set so resources are loaded as coming from this same
365 // origin. This is needed for the msg* endpoints that render a message, where we
366 // load the message body in a separate iframe again (with stricter CSP again),
367 // which we need to access for its inner height. If allowSelfScript is also set
368 // (for "msgtext"), the CSP leaves out the sandbox entirely.
370 // If allowExternal is set, we allow loading image, media (audio/video), styles and
371 // fronts from external URLs as well as inline URI's. By default we don't allow any
372 // loading of content, except inlined images (we do that ourselves for images
373 // embedded in the email), and we allow inline styles (which are safely constrained
376 // If allowSelfScript is set, inline scripts and scripts from our origin are
377 // allowed. Used to display a message including header. The header is rendered with
378 // javascript, the content is rendered in a separate iframe with a CSP that doesn't
379 // have allowSelfScript.
380 headers := func(sameOrigin, allowExternal, allowSelfScript bool) {
381 // allow-popups is needed to make opening links in new tabs work.
382 sb := "sandbox allow-popups allow-popups-to-escape-sandbox; "
383 if sameOrigin && allowSelfScript {
384 // Sandbox with both allow-same-origin and allow-script would not provide security,
385 // and would give warning in console about that.
387 } else if sameOrigin {
388 sb = "sandbox allow-popups allow-popups-to-escape-sandbox allow-same-origin; "
392 script = "; script-src 'unsafe-inline' 'self'; frame-src 'self'; connect-src 'self'"
396 csp = sb + "frame-ancestors 'self'; default-src 'none'; img-src data: http: https: 'unsafe-inline'; style-src 'unsafe-inline' data: http: https:; font-src data: http: https: 'unsafe-inline'; media-src 'unsafe-inline' data: http: https:" + script
398 csp = sb + "frame-ancestors 'self'; default-src 'none'; img-src data:; style-src 'unsafe-inline'" + script
400 h.Set("Content-Security-Policy", csp)
401 h.Set("X-Frame-Options", "sameorigin") // Duplicate with CSP, but better too much than too little.
402 h.Set("X-Content-Type-Options", "nosniff")
403 h.Set("Referrer-Policy", "no-referrer")
407 case len(t) == 2 && t[1] == "attachments.zip":
408 acc, m, msgr, p, cleanup, ok := xprepare()
413 state := msgState{acc: acc, m: m, msgr: msgr, part: &p}
414 // note: state is cleared by cleanup
416 mi, err := messageItem(log, m, &state)
417 xcheckf(ctx, err, "parsing message")
419 headers(false, false, false)
420 h.Set("Content-Type", "application/zip")
421 h.Set("Cache-Control", "no-store, max-age=0")
422 var subjectSlug string
423 if p.Envelope != nil {
424 s := p.Envelope.Subject
425 s = strings.ToLower(s)
426 s = regexp.MustCompile("[^a-z0-9_.-]").ReplaceAllString(s, "-")
427 s = regexp.MustCompile("--*").ReplaceAllString(s, "-")
428 s = strings.TrimLeft(s, "-")
429 s = strings.TrimRight(s, "-")
435 filename := fmt.Sprintf("email-%d-attachments-%s%s.zip", m.ID, m.Received.Format("20060102-150405"), subjectSlug)
436 cd := mime.FormatMediaType("attachment", map[string]string{"filename": filename})
437 h.Set("Content-Disposition", cd)
439 zw := zip.NewWriter(w)
440 names := map[string]bool{}
441 for _, a := range mi.Attachments {
443 name := tryDecodeParam(log, ap.ContentTypeParams["name"])
445 // We don't check errors, this is all best-effort.
447 disposition := h.Get("Content-Disposition")
448 _, params, _ := mime.ParseMediaType(disposition)
449 name = tryDecodeParam(log, params["filename"])
452 name = filepath.Base(name)
454 mt := strings.ToLower(ap.MediaType + "/" + ap.MediaSubType)
455 if name == "" || names[name] {
456 ext := filepath.Ext(name)
458 // Handle just a few basic types.
459 extensions := map[string]string{
460 "text/plain": ".txt",
461 "text/html": ".html",
462 "image/jpeg": ".jpg",
465 "application/zip": ".zip",
473 if name != "" && strings.HasSuffix(name, ext) {
474 stem = strings.TrimSuffix(name, ext)
477 for _, index := range a.Path {
478 stem += fmt.Sprintf("-%d", index)
485 name = stem + fmt.Sprintf("-%d", seq) + ext
490 fh := zip.FileHeader{
492 Modified: m.Received,
494 nodeflate := map[string]bool{
495 "application/x-bzip2": true,
496 "application/zip": true,
497 "application/x-zip-compressed": true,
498 "application/gzip": true,
499 "application/x-gzip": true,
500 "application/vnd.rar": true,
501 "application/x-rar-compressed": true,
502 "application/x-7z-compressed": true,
504 // Sniff content-type as well for compressed data.
505 buf := make([]byte, 512)
506 n, _ := io.ReadFull(ap.Reader(), buf)
509 sniffmt = strings.ToLower(http.DetectContentType(buf[:n]))
511 deflate := ap.MediaType != "VIDEO" && ap.MediaType != "AUDIO" && (ap.MediaType != "IMAGE" || ap.MediaSubType == "BMP") && !nodeflate[mt] && !nodeflate[sniffmt]
513 fh.Method = zip.Deflate
515 // We cannot return errors anymore: we have already sent an application/zip header.
516 if zf, err := zw.CreateHeader(&fh); err != nil {
517 log.Check(err, "adding to zip file")
519 } else if _, err := io.Copy(zf, ap.Reader()); err != nil {
520 log.Check(err, "writing to zip file")
525 log.Check(err, "final write to zip file")
527 // Raw display of a message, as text/plain.
528 case len(t) == 2 && t[1] == "raw":
529 _, _, msgr, p, cleanup, ok := xprepare()
535 // We intentially use text/plain. We certainly don't want to return a format that
536 // browsers or users would think of executing. We do set the charset if available
537 // on the outer part. If present, we assume it may be relevant for other parts. If
538 // not, there is not much we could do better...
539 headers(false, false, false)
541 params := map[string]string{}
542 if charset := p.ContentTypeParams["charset"]; charset != "" {
543 params["charset"] = charset
545 h.Set("Content-Type", mime.FormatMediaType(ct, params))
546 h.Set("Cache-Control", "no-store, max-age=0")
548 _, err := io.Copy(w, &moxio.AtReader{R: msgr})
549 log.Check(err, "writing raw")
551 case len(t) == 2 && (t[1] == "msgtext" || t[1] == "msghtml" || t[1] == "msghtmlexternal"):
552 // msg.html has a javascript tag with message data, and javascript to render the
553 // message header like the regular webmail.html and to load the message body in a
554 // separate iframe with a separate request with stronger CSP.
555 acc, m, msgr, p, cleanup, ok := xprepare()
561 state := msgState{acc: acc, m: m, msgr: msgr, part: &p}
562 // note: state is cleared by cleanup
564 pm, err := parsedMessage(log, m, &state, true, true)
565 xcheckf(ctx, err, "getting parsed message")
566 if t[1] == "msgtext" && len(pm.Texts) == 0 || t[1] != "msgtext" && !pm.HasHTML {
567 http.Error(w, "400 - bad request - no such part", http.StatusBadRequest)
572 loadExternal := t[1] == "msghtmlexternal"
573 allowSelfScript := true
574 headers(sameorigin, loadExternal, allowSelfScript)
575 h.Set("Content-Type", "text/html; charset=utf-8")
576 h.Set("Cache-Control", "no-store, max-age=0")
578 path := filepath.FromSlash("webmail/msg.html")
579 fallback := webmailmsgHTML
580 serveContentFallback(log, w, r, path, fallback)
582 case len(t) == 2 && t[1] == "parsedmessage.js":
583 // Used by msg.html, for the msg* endpoints, for the data needed to show all data
584 // except the message body.
585 // This is js with data inside instead so we can load it synchronously, which we do
586 // to get a "loaded" event after the page was actually loaded.
588 acc, m, msgr, p, cleanup, ok := xprepare()
593 state := msgState{acc: acc, m: m, msgr: msgr, part: &p}
594 // note: state is cleared by cleanup
596 pm, err := parsedMessage(log, m, &state, true, true)
597 xcheckf(ctx, err, "parsing parsedmessage")
598 pmjson, err := json.Marshal(pm)
599 xcheckf(ctx, err, "marshal parsedmessage")
603 mi := MessageItem{m, pm.envelope, pm.attachments, pm.isSigned, pm.isEncrypted, pm.firstLine, false}
604 mijson, err := json.Marshal(mi)
605 xcheckf(ctx, err, "marshal messageitem")
607 headers(false, false, false)
608 h.Set("Content-Type", "application/javascript; charset=utf-8")
609 h.Set("Cache-Control", "no-store, max-age=0")
611 _, err = fmt.Fprintf(w, "window.messageItem = %s;\nwindow.parsedMessage = %s;\n", mijson, pmjson)
612 log.Check(err, "writing parsedmessage.js")
614 case len(t) == 2 && t[1] == "text":
615 // Returns text.html whichs loads the message data with a javascript tag and
616 // renders just the text content with the same code as webmail.html. Used by the
617 // iframe in the msgtext endpoint. Not used by the regular webmail viewer, it
618 // renders the text itself, with the same shared js code.
619 acc, m, msgr, p, cleanup, ok := xprepare()
625 state := msgState{acc: acc, m: m, msgr: msgr, part: &p}
626 // note: state is cleared by cleanup
628 pm, err := parsedMessage(log, m, &state, true, true)
629 xcheckf(ctx, err, "parsing parsedmessage")
631 if len(pm.Texts) == 0 {
632 http.Error(w, "400 - bad request - no text part in message", http.StatusBadRequest)
636 // Needed for inner document height for outer iframe height in separate message view.
638 allowSelfScript := true
639 headers(sameorigin, false, allowSelfScript)
640 h.Set("Content-Type", "text/html; charset=utf-8")
641 h.Set("Cache-Control", "no-store, max-age=0")
643 // We typically return the embedded file, but during development it's handy to load
645 path := filepath.FromSlash("webmail/text.html")
646 fallback := webmailtextHTML
647 serveContentFallback(log, w, r, path, fallback)
649 case len(t) == 2 && (t[1] == "html" || t[1] == "htmlexternal"):
650 // Returns the first HTML part, with "cid:" URIs replaced with an inlined datauri
651 // if the referenced Content-ID attachment can be found.
652 _, _, _, p, cleanup, ok := xprepare()
658 setHeaders := func() {
659 // Needed for inner document height for outer iframe height in separate message
660 // view. We only need that when displaying as a separate message on the msghtml*
661 // endpoints. When displaying in the regular webmail, we don't need to know the
662 // inner height so we load it as different origin, which should be safer.
663 sameorigin := r.URL.Query().Get("sameorigin") == "true"
664 allowExternal := strings.HasSuffix(t[1], "external")
665 headers(sameorigin, allowExternal, false)
667 h.Set("Content-Type", "text/html; charset=utf-8")
668 h.Set("Cache-Control", "no-store, max-age=0")
671 // todo: skip certain html parts? e.g. with content-disposition: attachment?
673 var usePart func(p *message.Part, parents []*message.Part)
674 usePart = func(p *message.Part, parents []*message.Part) {
678 mt := p.MediaType + "/" + p.MediaSubType
682 err := inlineSanitizeHTML(log, setHeaders, w, p, parents)
684 http.Error(w, "400 - bad request - "+err.Error(), http.StatusBadRequest)
688 parents = append(parents, p)
689 for _, sp := range p.Parts {
690 usePart(&sp, parents)
696 http.Error(w, "400 - bad request - no html part in message", http.StatusBadRequest)
699 case len(t) == 3 && (t[1] == "view" || t[1] == "viewtext" || t[1] == "download"):
700 // View any part, as referenced in the last element path. "0" is the whole message,
701 // 0.0 is the first subpart, etc. "view" returns it with the content-type from the
702 // message (could be dangerous, but we set strict CSP headers), "viewtext" returns
703 // data with a text/plain content-type so the browser will attempt to display it,
704 // and "download" adds a content-disposition header causing the browser the
705 // download the file.
706 _, _, _, p, cleanup, ok := xprepare()
712 paths := strings.Split(t[2], ".")
713 if len(paths) == 0 || paths[0] != "0" {
718 for _, e := range paths[1:] {
719 index, err := strconv.ParseInt(e, 10, 32)
720 if err != nil || index < 0 || int(index) >= len(ap.Parts) {
724 ap = ap.Parts[int(index)]
727 headers(false, false, false)
729 if t[1] == "viewtext" {
732 ct = strings.ToLower(ap.MediaType + "/" + ap.MediaSubType)
734 h.Set("Content-Type", ct)
735 h.Set("Cache-Control", "no-store, max-age=0")
736 if t[1] == "download" {
737 name := tryDecodeParam(log, ap.ContentTypeParams["name"])
739 // We don't check errors, this is all best-effort.
741 disposition := h.Get("Content-Disposition")
742 _, params, _ := mime.ParseMediaType(disposition)
743 name = tryDecodeParam(log, params["filename"])
746 name = "attachment.bin"
748 cd := mime.FormatMediaType("attachment", map[string]string{"filename": name})
749 h.Set("Content-Disposition", cd)
752 _, err := io.Copy(w, ap.Reader())
753 if err != nil && !moxio.IsClosed(err) {
754 log.Errorx("copying attachment", err)
761// inlineSanitizeHTML writes the part as HTML, with "cid:" URIs for html "src"
762// attributes inlined and with potentially dangerous tags removed (javascript). The
763// sanitizing is just a first layer of defense, CSP headers block execution of
764// scripts. If the HTML becomes too large, an error is returned. Before writing
765// HTML, setHeaders is called to write the required headers for content-type and
766// CSP. On error, setHeader is not called, no output is written and the caller
767// should write an error response.
768func inlineSanitizeHTML(log mlog.Log, setHeaders func(), w io.Writer, p *message.Part, parents []*message.Part) error {
769 // Prepare cids if there is a chance we will use them.
770 cids := map[string]*message.Part{}
771 for _, parent := range parents {
772 if parent.MediaType+"/"+parent.MediaSubType == "MULTIPART/RELATED" && p.DecodedSize < 2*1024*1024 {
773 for i, rp := range parent.Parts {
774 if rp.ContentID != "" {
775 cids[strings.ToLower(rp.ContentID)] = &parent.Parts[i]
781 node, err := html.Parse(p.ReaderUTF8OrBinary())
783 return fmt.Errorf("parsing html: %v", err)
786 // We track size, if it becomes too much, we abort and still copy as regular html.
788 if err := inlineNode(node, cids, &totalSize); err != nil {
789 return fmt.Errorf("inline cid uris in html nodes: %w", err)
793 err = html.Render(w, node)
794 log.Check(err, "writing html")
798// We inline cid: URIs into data: URIs. If a cid is missing in the
799// multipart/related, we ignore the error and continue with other HTML nodes. It
800// will probably just result in a "broken image". We limit the max size we
801// generate. We only replace "src" attributes that start with "cid:". A cid URI
802// could theoretically occur in many more places, like link href, and css url().
803// That's probably not common though. Let's wait for someone to need it.
804func inlineNode(node *html.Node, cids map[string]*message.Part, totalSize *int64) error {
805 for i, a := range node.Attr {
806 if a.Key != "src" || !caselessPrefix(a.Val, "cid:") || a.Namespace != "" {
810 ap := cids["<"+strings.ToLower(cid)+">"]
812 // Missing cid, can happen with email, no need to stop returning data.
815 *totalSize += ap.DecodedSize
816 if *totalSize >= 10*1024*1024 {
817 return fmt.Errorf("html too large")
819 var sb strings.Builder
820 if _, err := fmt.Fprintf(&sb, "data:%s;base64,", strings.ToLower(ap.MediaType+"/"+ap.MediaSubType)); err != nil {
821 return fmt.Errorf("writing datauri: %v", err)
823 w := base64.NewEncoder(base64.StdEncoding, &sb)
824 if _, err := io.Copy(w, ap.Reader()); err != nil {
825 return fmt.Errorf("writing base64 datauri: %v", err)
827 node.Attr[i].Val = sb.String()
829 for node = node.FirstChild; node != nil; node = node.NextSibling {
830 if err := inlineNode(node, cids, totalSize); err != nil {
837func caselessPrefix(k, pre string) bool {
838 return len(k) >= len(pre) && strings.EqualFold(k[:len(pre)], pre)
841var targetable = map[string]bool{
848// sanitizeNode removes script elements, on* attributes, javascript: href
849// attributes, adds target="_blank" to all links and to a base tag.
850func sanitizeNode(node *html.Node) {
852 var haveTarget, haveRel bool
853 for i < len(node.Attr) {
855 // Remove dangerous attributes.
856 if strings.HasPrefix(a.Key, "on") || a.Key == "href" && caselessPrefix(a.Val, "javascript:") || a.Key == "src" && caselessPrefix(a.Val, "data:text/html") {
857 copy(node.Attr[i:], node.Attr[i+1:])
858 node.Attr = node.Attr[:len(node.Attr)-1]
861 if a.Key == "target" {
862 node.Attr[i].Val = "_blank"
865 if a.Key == "rel" && targetable[node.Data] {
866 node.Attr[i].Val = "noopener noreferrer"
871 // Ensure target attribute is set for elements that can have it.
872 if !haveTarget && node.Type == html.ElementNode && targetable[node.Data] {
873 node.Attr = append(node.Attr, html.Attribute{Key: "target", Val: "_blank"})
876 if haveTarget && !haveRel {
877 node.Attr = append(node.Attr, html.Attribute{Key: "rel", Val: "noopener noreferrer"})
881 node = node.FirstChild
884 // Set next now, we may remove cur, which clears its NextSibling.
886 node = node.NextSibling
888 // Remove script elements.
889 if cur.Type == html.ElementNode && cur.Data == "script" {
890 parent.RemoveChild(cur)
895 if parent.Type == html.ElementNode && parent.Data == "head" && !haveBase {
896 n := html.Node{Type: html.ElementNode, Data: "base", Attr: []html.Attribute{{Key: "target", Val: "_blank"}, {Key: "rel", Val: "noopener noreferrer"}}}
897 parent.AppendChild(&n)