1//go:generate sh -c "curl https://publicsuffix.org/list/public_suffix_list.dat >public_suffix_list.txt"
2
3// Package publicsuffix implements a public suffix list to look up the
4// organizational domain for a given host name. Organizational domains can be
5// registered, one level below a top-level domain.
6//
7// Example.com has a public suffix ".com", and example.co.uk has a public
8// suffix ".co.uk". The organizational domain of sub.example.com is
9// example.com, and the organization domain of sub.example.co.uk is
10// example.co.uk.
11package publicsuffix
12
13import (
14 "bufio"
15 "bytes"
16 "context"
17 "fmt"
18 "io"
19 "log/slog"
20 "strings"
21
22 _ "embed"
23
24 "golang.org/x/net/idna"
25
26 "github.com/mjl-/mox/dns"
27 "github.com/mjl-/mox/mlog"
28)
29
30// todo: automatically fetch new lists periodically? compare it with the old one. refuse it if it changed too much, especially if it contains far fewer entries than before.
31
32// Labels map from utf8 labels to labels for subdomains.
33// The end is marked with an empty string as label.
34type labels map[string]labels
35
36// List is a public suffix list.
37type List struct {
38 includes, excludes labels
39}
40
41var publicsuffixList List
42
43//go:embed public_suffix_list.txt
44var publicsuffixData []byte
45
46func init() {
47 log := mlog.New("publicsuffix", nil)
48 l, err := ParseList(log.Logger, bytes.NewReader(publicsuffixData))
49 if err != nil {
50 log.Fatalx("parsing public suffix list", err)
51 }
52 publicsuffixList = l
53}
54
55// ParseList parses a public suffix list.
56// Only the "ICANN DOMAINS" are used.
57func ParseList(elog *slog.Logger, r io.Reader) (List, error) {
58 log := mlog.New("publicsuffix", elog)
59
60 list := List{labels{}, labels{}}
61 br := bufio.NewReader(r)
62
63 // Only use ICANN domains. ../rfc/7489-eid6729
64 var icannDomains bool
65 for {
66 line, err := br.ReadString('\n')
67 if line != "" {
68 line = strings.TrimSpace(line)
69 if strings.HasPrefix(line, "// ===BEGIN ICANN DOMAINS===") {
70 icannDomains = true
71 continue
72 } else if strings.HasPrefix(line, "// ===END ICANN DOMAINS===") {
73 icannDomains = false
74 continue
75 } else if line == "" || strings.HasPrefix(line, "//") || !icannDomains {
76 continue
77 }
78 l := list.includes
79 var t []string
80 oline := line
81 if strings.HasPrefix(line, "!") {
82 line = line[1:]
83 l = list.excludes
84 t = strings.Split(line, ".")
85 if len(t) == 1 {
86 log.Print("exclude rule with single label, skipping", slog.String("line", oline))
87 continue
88 }
89 } else {
90 t = strings.Split(line, ".")
91 }
92 for i := len(t) - 1; i >= 0; i-- {
93 w := t[i]
94 if w == "" {
95 log.Print("empty label in rule, skipping", slog.String("line", oline))
96 break
97 }
98 if w != "" && w != "*" {
99 w, err = idna.Lookup.ToUnicode(w)
100 if err != nil {
101 log.Printx("invalid label, skipping", err, slog.String("line", oline))
102 }
103 }
104 m, ok := l[w]
105 if ok {
106 if _, dup := m[""]; i == 0 && dup {
107 log.Print("duplicate rule", slog.String("line", oline))
108 }
109 l = m
110 } else {
111 m = labels{}
112 l[w] = m
113 l = m
114 }
115 }
116 l[""] = nil // Mark end.
117 }
118 if err == io.EOF {
119 break
120 }
121 if err != nil {
122 return List{}, fmt.Errorf("reading public suffix list: %w", err)
123 }
124 }
125 return list, nil
126}
127
128// Lookup calls Lookup on the builtin public suffix list, from
129// https://publicsuffix.org/list/.
130func Lookup(ctx context.Context, elog *slog.Logger, domain dns.Domain) (orgDomain dns.Domain) {
131 return publicsuffixList.Lookup(ctx, elog, domain)
132}
133
134// Lookup returns the organizational domain. If domain is an organizational
135// domain, or higher-level, the same domain is returned.
136func (l List) Lookup(ctx context.Context, elog *slog.Logger, domain dns.Domain) (orgDomain dns.Domain) {
137 log := mlog.New("publicsuffix", elog)
138 defer func() {
139 log.Debug("publicsuffix lookup result", slog.Any("reqdom", domain), slog.Any("orgdom", orgDomain))
140 }()
141
142 t := strings.Split(domain.Name(), ".")
143
144 var n int
145 if nexcl, ok := match(l.excludes, t); ok {
146 n = nexcl
147 } else if nincl, ok := match(l.includes, t); ok {
148 n = nincl + 1
149 } else {
150 n = 2
151 }
152 if len(t) < n {
153 return domain
154 }
155 name := strings.Join(t[len(t)-n:], ".")
156 if isASCII(name) {
157 return dns.Domain{ASCII: name}
158 }
159 t = strings.Split(domain.ASCII, ".")
160 ascii := strings.Join(t[len(t)-n:], ".")
161 return dns.Domain{ASCII: ascii, Unicode: name}
162}
163
164func isASCII(s string) bool {
165 for _, c := range s {
166 if c >= 0x80 {
167 return false
168 }
169 }
170 return true
171}
172
173func match(l labels, t []string) (int, bool) {
174 if len(t) == 0 {
175 _, ok := l[""]
176 return 0, ok
177 }
178 s := t[len(t)-1]
179 t = t[:len(t)-1]
180 n := 0
181 if m, mok := l[s]; mok {
182 if nn, sok := match(m, t); sok {
183 n = 1 + nn
184 }
185 }
186 if m, mok := l["*"]; mok {
187 if nn, sok := match(m, t); sok && nn >= n {
188 n = 1 + nn
189 }
190 }
191 _, mok := l[""]
192 return n, n > 0 || mok
193}
194