1//go:generate sh -c "curl https://publicsuffix.org/list/public_suffix_list.dat >public_suffix_list.txt"
3// Package publicsuffix implements a public suffix list to look up the
4// organizational domain for a given host name. Organizational domains can be
5// registered, one level below a top-level domain.
7// Example.com has a public suffix ".com", and example.co.uk has a public
8// suffix ".co.uk". The organizational domain of sub.example.com is
9// example.com, and the organization domain of sub.example.co.uk is
24 "golang.org/x/net/idna"
26 "github.com/mjl-/mox/dns"
27 "github.com/mjl-/mox/mlog"
30// todo: automatically fetch new lists periodically? compare it with the old one. refuse it if it changed too much, especially if it contains far fewer entries than before.
32// Labels map from utf8 labels to labels for subdomains.
33// The end is marked with an empty string as label.
34type labels map[string]labels
36// List is a public suffix list.
38 includes, excludes labels
41var publicsuffixList List
43//go:embed public_suffix_list.txt
44var publicsuffixData []byte
47 log := mlog.New("publicsuffix", nil)
48 l, err := ParseList(log.Logger, bytes.NewReader(publicsuffixData))
50 log.Fatalx("parsing public suffix list", err)
55// ParseList parses a public suffix list.
56// Only the "ICANN DOMAINS" are used.
57func ParseList(elog *slog.Logger, r io.Reader) (List, error) {
58 log := mlog.New("publicsuffix", elog)
60 list := List{labels{}, labels{}}
61 br := bufio.NewReader(r)
66 line, err := br.ReadString('\n')
68 line = strings.TrimSpace(line)
69 if strings.HasPrefix(line, "// ===BEGIN ICANN DOMAINS===") {
72 } else if strings.HasPrefix(line, "// ===END ICANN DOMAINS===") {
75 } else if line == "" || strings.HasPrefix(line, "//") || !icannDomains {
81 if strings.HasPrefix(line, "!") {
84 t = strings.Split(line, ".")
86 log.Print("exclude rule with single label, skipping", slog.String("line", oline))
90 t = strings.Split(line, ".")
92 for i := len(t) - 1; i >= 0; i-- {
95 log.Print("empty label in rule, skipping", slog.String("line", oline))
98 if w != "" && w != "*" {
99 w, err = idna.Lookup.ToUnicode(w)
101 log.Printx("invalid label, skipping", err, slog.String("line", oline))
106 if _, dup := m[""]; i == 0 && dup {
107 log.Print("duplicate rule", slog.String("line", oline))
116 l[""] = nil // Mark end.
122 return List{}, fmt.Errorf("reading public suffix list: %w", err)
128// Lookup calls Lookup on the builtin public suffix list, from
129// https://publicsuffix.org/list/.
130func Lookup(ctx context.Context, elog *slog.Logger, domain dns.Domain) (orgDomain dns.Domain) {
131 return publicsuffixList.Lookup(ctx, elog, domain)
134// Lookup returns the organizational domain. If domain is an organizational
135// domain, or higher-level, the same domain is returned.
136func (l List) Lookup(ctx context.Context, elog *slog.Logger, domain dns.Domain) (orgDomain dns.Domain) {
137 log := mlog.New("publicsuffix", elog)
139 log.Debug("publicsuffix lookup result", slog.Any("reqdom", domain), slog.Any("orgdom", orgDomain))
142 t := strings.Split(domain.Name(), ".")
145 if nexcl, ok := match(l.excludes, t); ok {
147 } else if nincl, ok := match(l.includes, t); ok {
155 name := strings.Join(t[len(t)-n:], ".")
157 return dns.Domain{ASCII: name}
159 t = strings.Split(domain.ASCII, ".")
160 ascii := strings.Join(t[len(t)-n:], ".")
161 return dns.Domain{ASCII: ascii, Unicode: name}
164func isASCII(s string) bool {
165 for _, c := range s {
173func match(l labels, t []string) (int, bool) {
181 if m, mok := l[s]; mok {
182 if nn, sok := match(m, t); sok {
186 if m, mok := l["*"]; mok {
187 if nn, sok := match(m, t); sok && nn >= n {
192 return n, n > 0 || mok