sanitize_test.gno
16.46 Kb · 464 lines
1package sanitize
2
3import (
4 "strings"
5 "testing"
6)
7
8func TestStripBidiAndZeroWidthRe(t *testing.T) {
9 // Re-export sanity check.
10 if got := StripBidiAndZeroWidth("a\u200Bb"); got != "ab" {
11 t.Errorf("re-export StripBidiAndZeroWidth failed: got %q", got)
12 }
13}
14
15func TestNormalizeBreaksRe(t *testing.T) {
16 if got := NormalizeBreaks("a\r\nb"); got != "a\nb" {
17 t.Errorf("re-export NormalizeBreaks failed: got %q", got)
18 }
19}
20
21func TestInlineText(t *testing.T) {
22 cases := []struct{ in, want string }{
23 {"plain", "plain"},
24 {"a*b", `a\*b`},
25 {"a|b", "a|b"}, // | NOT escaped
26 {"a=b", "a=b"}, // = NOT escaped
27 {"line1\nline2", "line1 line2"}, // newline folded
28 {"line1\r\nline2", "line1 line2"}, // CRLF normalized then folded
29 {"a\u2028b", "a b"}, // U+2028 folded
30 {"a\u0085b", "a b"}, // NEL folded
31 {"a\u200Bb", "ab"}, // ZWSP stripped
32 {"hello *world*", `hello \*world\*`},
33 }
34 for _, c := range cases {
35 if got := InlineText(c.in); got != c.want {
36 t.Errorf("InlineText(%q) = %q, want %q", c.in, got, c.want)
37 }
38 }
39}
40
41func TestBlock(t *testing.T) {
42 // Block wraps every non-empty output with "\n\n" — CM §4.8 blank
43 // lines — on BOTH sides, so user content is paragraph-isolated
44 // from any realm chrome that precedes OR follows it. Bounds CM
45 // §4.6 HTML block types 6/7 (`<div>`, `<table>`, …) which are not
46 // escaped in any mode and would otherwise consume appended chrome.
47 cases := []struct{ in, want string }{
48 {"hello world\n", "\n\nhello world\n\n"},
49 {"# heading\n", "\n\n\\# heading\n\n"},
50 {"> quote\n", "\n\n\\> quote\n\n"},
51 {"| a | b |\n", "\n\n\\| a | b |\n\n"}, // GFM table-row escaped in strict mode
52 // CM §4.6 HTML block types 1-5 — escaped (blank-line-NON-terminating).
53 {"<script>x</script>\n", "\n\n\\<script>x</script>\n\n"},
54 {"<!-- comment -->\n", "\n\n\\<!-- comment -->\n\n"},
55 {"<?php x ?>\n", "\n\n\\<?php x ?>\n\n"},
56 {"<!DOCTYPE html>\n", "\n\n\\<!DOCTYPE html>\n\n"},
57 {"```\ncode\n", "\n\n```\ncode\n```\n\n"}, // fence auto-close at EOF
58 // Empty / strip-to-empty inputs short-circuit (no stray blank line).
59 {"[x]: y\n", ""},
60 {"", ""},
61 }
62 for _, c := range cases {
63 got := Block(c.in)
64 if got != c.want {
65 t.Errorf("Block(%q) = %q, want %q", c.in, got, c.want)
66 }
67 // Idempotency: Block(Block(in)) must be byte-identical to
68 // Block(in) for every input in the table.
69 twice := Block(got)
70 if twice != got {
71 t.Errorf("Block not idempotent for %q: Block(once)=%q, Block(twice)=%q", c.in, got, twice)
72 }
73 }
74}
75
76func TestBlockRich(t *testing.T) {
77 // BlockRich wraps every non-empty output with "\n\n" — CM §4.8
78 // blank lines — on BOTH sides, so user content is paragraph-
79 // isolated from any realm chrome that precedes OR follows it.
80 cases := []struct{ in, want string }{
81 // Block-level markdown that Block escapes — preserved by BlockRich.
82 {"hello world\n", "\n\nhello world\n\n"},
83 {"# heading\n", "\n\n# heading\n\n"},
84 {"> quote\n", "\n\n> quote\n\n"},
85 {"- item\n", "\n\n- item\n\n"},
86 {"1. item\n", "\n\n1. item\n\n"},
87 {"---\n", "\n\n\\---\n\n"}, // first-line setext-h2 → escaped by qualifying-setext pre-pass
88 {"***\n", "\n\n***\n\n"}, // thematic break NOT setext-h2 — preserved
89 {"text\n===\n", "\n\ntext\n===\n\n"}, // deeper setext preserved (user-authored above)
90 {"body\n===\nmore\n", "\n\nbody\n===\nmore\n\n"}, // cross-paragraph backward attack: realm `chrome\n\nbody\n===\nmore` keeps realm in its own paragraph
91 // GFM tables PRESERVED in Rich mode.
92 {"| a | b |\n", "\n\n| a | b |\n\n"},
93 {"| H |\n|---|\n| a |\n", "\n\n| H |\n|---|\n| a |\n\n"}, // full table renders as <table>
94 // Realm-binding defenses STILL ON.
95 {"<gno-card>\n", "\n\n\\<gno-card>\n\n"},
96 // CM §4.6 HTML block types 1-5 — escaped in Rich mode too
97 // (defense is mode-independent).
98 {"<script>x</script>\n", "\n\n\\<script>x</script>\n\n"},
99 {"<!-- comment -->\n", "\n\n\\<!-- comment -->\n\n"},
100 {"<?php x ?>\n", "\n\n\\<?php x ?>\n\n"},
101 {"<!DOCTYPE html>\n", "\n\n\\<!DOCTYPE html>\n\n"},
102 {"[t][l]\n", "\n\n\\[t\\]\\[l\\]\n\n"},
103 {"[^name]\n", "\n\n\\[^name\\]\n\n"},
104 // LRD-only input strips to nothing; empty-after-escape short-
105 // circuits to "" so realm concatenation doesn't leak a stray
106 // blank line.
107 {"[x]: y\n", ""},
108 {"", ""}, // empty input → empty output, no stray blank line
109 {"```\ncode\n", "\n\n```\ncode\n```\n\n"},
110 }
111 for _, c := range cases {
112 got := BlockRich(c.in)
113 if got != c.want {
114 t.Errorf("BlockRich(%q) = %q, want %q", c.in, got, c.want)
115 }
116 // Idempotency: BlockRich(BlockRich(in)) must be byte-identical
117 // to BlockRich(in) for every input in the table.
118 twice := BlockRich(got)
119 if twice != got {
120 t.Errorf("BlockRich not idempotent for %q: BlockRich(once)=%q, BlockRich(twice)=%q", c.in, got, twice)
121 }
122 }
123}
124
125func TestBlockRich_LeadingBlankLineGuaranteed(t *testing.T) {
126 // Every non-empty result begins with "\n\n" so `chrome +
127 // BlockRich(user)` cannot place the user's first line in the same
128 // paragraph as the realm's last line (which would let a deeper
129 // `===` or `|---|` in user content retroactively promote realm
130 // chrome).
131 for _, in := range []string{"x", "x\n", "# h", "- i\n- j", "| a |\n|---|\n| 1 |"} {
132 got := BlockRich(in)
133 if got == "" {
134 continue
135 }
136 if !strings.HasPrefix(got, "\n\n") {
137 t.Errorf("BlockRich(%q) = %q; expected leading '\\n\\n'", in, got)
138 }
139 }
140}
141
142func TestBlockRich_TrailingBlankLineGuaranteed(t *testing.T) {
143 // Every non-empty result ends with "\n\n" so `BlockRich(user) +
144 // chrome` cannot extend user's last paragraph into the realm's
145 // next line (CM §5.2 lazy continuation, or a realm-supplied
146 // `|---|` row retroactively promoting user's last line into a
147 // `<thead>` header).
148 for _, in := range []string{"x", "x\n", "# h", "- i\n- j", "| H |\n|---|\n| a |"} {
149 got := BlockRich(in)
150 if got == "" {
151 continue
152 }
153 if !strings.HasSuffix(got, "\n\n") {
154 t.Errorf("BlockRich(%q) = %q; expected trailing '\\n\\n'", in, got)
155 }
156 }
157}
158
159func TestBlockquoteRich(t *testing.T) {
160 // BlockquoteRich strips BlockRich's leading "\n", line-prefixes
161 // each remaining line with "> ", and emits "\n" on both ends:
162 // leading "\n" prevents `chrome + BlockquoteRich(user)` from
163 // landing the first `>` mid-line; trailing "\n\n" (blank line)
164 // prevents `BlockquoteRich(user) + chrome` from pulling chrome
165 // into the quote via CM §5.2 lazy continuation.
166 cases := []struct{ name, in, want string }{
167 {"plain text", "hello world\n", "\n> hello world\n\n"},
168 {"atx heading preserved", "# heading\n", "\n> # heading\n\n"},
169 {"nested blockquote", "> nested\n", "\n> > nested\n\n"},
170 {"list item preserved", "- item\n", "\n> - item\n\n"},
171 {"ordered list preserved", "1. item\n", "\n> 1. item\n\n"},
172 {"first-line setext escaped", "---\n", "\n> \\---\n\n"},
173 {"deeper setext preserved", "text\n===\n", "\n> text\n> ===\n\n"},
174 {"thematic break asterisks preserved", "***\n", "\n> ***\n\n"},
175 // Realm-binding defenses still on.
176 {"gno extension escaped", "<gno-card>\n", "\n> \\<gno-card>\n\n"},
177 {"ref-link use escaped", "[t][l]\n", "\n> \\[t\\]\\[l\\]\n\n"},
178 {"footnote escaped", "[^name]\n", "\n> \\[^name\\]\n\n"},
179 // LRDs are stripped by BlockRich entirely; trimming the
180 // resulting bare "\n" leaves empty input and the helper
181 // returns "" without emitting a blockquote.
182 {"lrd alone strips to empty", "[x]: y\n", ""},
183 // Code fence autoclose at EOF lands inside the quote.
184 {"unclosed fence autoclosed", "```\ncode\n", "\n> ```\n> code\n> ```\n\n"},
185 // Multi-paragraph preserves the inner blank line (rendered
186 // as a quoted blank line `> \n`).
187 {"multi-paragraph preserves blank", "a\n\nb\n", "\n> a\n> \n> b\n\n"},
188 // Empty / blank-only input collapses cleanly to "".
189 {"empty input", "", ""},
190 // CRLF normalized through BlockRich.
191 {"crlf normalized", "a\r\nb\n", "\n> a\n> b\n\n"},
192 // NUL replaced with U+FFFD by BlockRich.
193 {"nul replaced", "x\x00y\n", "\n> x\uFFFDy\n\n"},
194 // Multiple trailing newlines collapse to the single trailing
195 // blank line shape — output never ends with more than `\n\n`.
196 {"multiple trailing newlines collapse", "foo\n\n\n", "\n> foo\n\n"},
197 // Internal tabs are preserved (BlockRich does not touch them).
198 {"internal tab preserved", "a\tb\n", "\n> a\tb\n\n"},
199 // Whitespace-only input still yields a blockquote (the user
200 // wrote literal spaces). The line-prefix loop emits `> ` plus
201 // the original two spaces.
202 {"whitespace-only input", " ", "\n> \n\n"},
203 }
204 for _, c := range cases {
205 got := BlockquoteRich(c.in)
206 if got != c.want {
207 t.Errorf("%s: BlockquoteRich(%q) = %q, want %q", c.name, c.in, got, c.want)
208 }
209 }
210}
211
212func TestBlockquoteRich_DoubleWrapNestsQuote(t *testing.T) {
213 // Not idempotent: calling twice nests the quote one level
214 // deeper. The aggressive TrimRight in BlockquoteRich also
215 // collapses the inner trailing blank line, so the second pass
216 // produces a clean `> > foo` nesting (no `> ` quoted blank in
217 // the middle) plus the outer trailing blank line.
218 once := BlockquoteRich("foo\n")
219 twice := BlockquoteRich(once)
220 if want := "\n> > foo\n\n"; twice != want {
221 t.Errorf("BlockquoteRich(BlockquoteRich(%q)) = %q, want %q", "foo\n", twice, want)
222 }
223}
224
225func TestBlockquoteRich_LeadingNewlineGuaranteed(t *testing.T) {
226 // Every non-empty result starts with "\n" so realm concatenation
227 // like `chrome + BlockquoteRich(user)` doesn't place `>` mid-line.
228 for _, in := range []string{"x", "x\n", "# h", "- i\n- j"} {
229 got := BlockquoteRich(in)
230 if got == "" {
231 continue
232 }
233 if got[0] != '\n' {
234 t.Errorf("BlockquoteRich(%q) = %q; expected leading '\\n'", in, got)
235 }
236 }
237}
238
239func TestBlockquoteRich_TrailingBlankLineGuaranteed(t *testing.T) {
240 // Every non-empty result ends with "\n\n" so realm concatenation
241 // like `BlockquoteRich(user) + "more text"` doesn't pull "more
242 // text" into the quote via CM §5.2 lazy continuation.
243 for _, in := range []string{"x", "x\n", "# h", "- i\n- j", "para\n\nmore"} {
244 got := BlockquoteRich(in)
245 if got == "" {
246 continue
247 }
248 if !strings.HasSuffix(got, "\n\n") {
249 t.Errorf("BlockquoteRich(%q) = %q; expected trailing '\\n\\n'", in, got)
250 }
251 }
252}
253
254func TestBlockquoteRich_NoStrayEmptyQuotedLine(t *testing.T) {
255 // BlockRich's own leading "\n" must be stripped before
256 // line-prefixing — otherwise the output would start with a
257 // useless `> \n` empty quoted line.
258 got := BlockquoteRich("foo\n")
259 if strings.HasPrefix(got, "\n> \n") {
260 t.Errorf("BlockquoteRich leaked BlockRich's leading '\\n' as `> \\n`: %q", got)
261 }
262}
263
264func TestNeuterLeadingSetextIfQualifying(t *testing.T) {
265 cases := []struct{ name, in, want string }{
266 {"leading-setext-h1", "===\nbody\n", "\\===\nbody\n"},
267 {"leading-setext-h2", "---\nbody\n", "\\---\nbody\n"},
268 {"leading-setext-indented", " ===\nbody\n", " \\===\nbody\n"},
269 {"leading-setext-indented-4", " ===\nbody\n", " ===\nbody\n"}, // indented code
270 {"leading-setext-trailing-ws", "=== \nbody\n", "\\=== \nbody\n"},
271 {"leading-setext-mixed-chars", "=-=-\nbody\n", "=-=-\nbody\n"}, // mixed; not setext
272 {"leading-setext-has-content", "=== text\nbody\n", "=== text\nbody\n"}, // mixed content
273 {"setext-deeper-untouched", "title\n===\nfoo\n", "title\n===\nfoo\n"},
274 {"leading-thematic-asterisk", "***\nbody\n", "***\nbody\n"}, // not setext, untouched
275 {"leading-thematic-underscore", "___\nbody\n", "___\nbody\n"},
276 {"leading-blank-then-setext", "\n===\nbody\n", "\n\\===\nbody\n"},
277 {"leading-blank-ws-then-setext", " \n===\nbody\n", " \n\\===\nbody\n"},
278 {"text-first", "hello\n===\n", "hello\n===\n"}, // text before === — user-authored
279 {"empty", "", ""},
280 {"all-blank", " \n\n", " \n\n"},
281 {"only-equals", "===", "\\==="},
282 }
283 for _, c := range cases {
284 if got := neuterLeadingSetextIfQualifying(c.in); got != c.want {
285 t.Errorf("%s: neuterLeadingSetextIfQualifying(%q) = %q, want %q", c.name, c.in, got, c.want)
286 }
287 }
288}
289
290func TestLinkTitle(t *testing.T) {
291 cases := []struct{ in, want string }{
292 {`he said "hi"`, `he said \"hi\"`},
293 {`it's nice`, `it\'s nice`},
294 {"line1\nline2", "line1 line2"},
295 }
296 for _, c := range cases {
297 if got := LinkTitle(c.in); got != c.want {
298 t.Errorf("LinkTitle(%q) = %q, want %q", c.in, got, c.want)
299 }
300 }
301}
302
303func TestTableCell(t *testing.T) {
304 cases := []struct{ in, want string }{
305 {"plain cell", "plain cell"},
306 {"a|b", `a\|b`},
307 {"a\tb", "a b"},
308 {"a*b|c", `a\*b\|c`},
309 }
310 for _, c := range cases {
311 if got := TableCell(c.in); got != c.want {
312 t.Errorf("TableCell(%q) = %q, want %q", c.in, got, c.want)
313 }
314 }
315}
316
317func TestHTMLEscape(t *testing.T) {
318 cases := []struct{ in, want string }{
319 {"plain", "plain"},
320 {"<script>", "<script>"},
321 {`a & b`, "a & b"},
322 {`"quoted"`, ""quoted""},
323 }
324 for _, c := range cases {
325 if got := HTMLEscape(c.in); got != c.want {
326 t.Errorf("HTMLEscape(%q) = %q, want %q", c.in, got, c.want)
327 }
328 }
329}
330
331func TestURL(t *testing.T) {
332 cases := []struct{ in, want string }{
333 {"https://example.com/x", "https://example.com/x"},
334 {"http://example.com", "http://example.com"},
335 {"mailto:a@b.com", "mailto:a@b.com"},
336 {"mailto:a@b.com?body=phish", ""}, // prefill phishing rejected
337 {"//evil.com", ""}, // protocol-relative rejected
338 {"javascript:alert(1)", ""}, // bad scheme
339 {"/r/foo", "/r/foo"}, // relative
340 {"./local", "./local"},
341 {"#section", "#section"}, // fragment-only
342 {"", ""},
343 {" ", ""},
344 {"https://a.com/path with space", "https://a.com/path%20with%20space"},
345 }
346 for _, c := range cases {
347 if got := URL(c.in); got != c.want {
348 t.Errorf("URL(%q) = %q, want %q", c.in, got, c.want)
349 }
350 }
351}
352
353func TestImageURL(t *testing.T) {
354 cases := []struct{ in, want string }{
355 {"https://example.com/img.png", "https://example.com/img.png"},
356 {"mailto:a@b.com", ""}, // mailto rejected for images
357 {"data:image/svg+xml,<svg/>", "data:image/svg+xml,%3Csvg/%3E"},
358 {"data:image/png;base64,XXX", "data:image/png;base64,XXX"},
359 {"data:text/html,<script>", ""}, // bad data subset
360 {"javascript:alert(1)", ""},
361 {"", ""},
362 }
363 for _, c := range cases {
364 if got := ImageURL(c.in); got != c.want {
365 t.Errorf("ImageURL(%q) = %q, want %q", c.in, got, c.want)
366 }
367 }
368}
369
370func TestUserName(t *testing.T) {
371 cases := []struct{ in, want string }{
372 {"alice", "alice"},
373 {"alice123", "alice123"},
374 {"alice_bob-cat", "alice_bob-cat"},
375 {"Alice", ""}, // uppercase first
376 {"1alice", ""}, // digit first
377 {"", ""},
378 {"a\u200Blice", "alice"}, // bidi stripped, then matches
379 }
380 for _, c := range cases {
381 if got := UserName(c.in); got != c.want {
382 t.Errorf("UserName(%q) = %q, want %q", c.in, got, c.want)
383 }
384 }
385}
386
387func TestBechString(t *testing.T) {
388 addrG := "g1abc123def456ghi789jkl012mno345p"
389 cases := []struct {
390 s, prefix string
391 want string
392 }{
393 {addrG, "g", addrG},
394 {addrG, "", addrG}, // any prefix
395 {addrG, "gpub", ""}, // wrong prefix
396 {"gpub1abc123def456ghijklmn", "gpub", "gpub1abc123def456ghijklmn"},
397 {"gpub1abc123def456ghijklmn", "", "gpub1abc123def456ghijklmn"},
398 {"b1xyz789abc123def456", "", "b1xyz789abc123def456"}, // any-prefix mode allows b1...
399 {"g1ABC", "g", ""}, // uppercase rejected
400 {"x", "g", ""},
401 {"", "g", ""},
402 }
403 for _, c := range cases {
404 if got := BechString(c.s, c.prefix); got != c.want {
405 t.Errorf("BechString(%q,%q) = %q, want %q", c.s, c.prefix, got, c.want)
406 }
407 }
408}
409
410func TestFootnoteLabel(t *testing.T) {
411 cases := []struct{ in, want string }{
412 {"note1", "note1"},
413 {"Note_A-1", "Note_A-1"},
414 {"with space", ""},
415 {"", ""},
416 }
417 for _, c := range cases {
418 if got := FootnoteLabel(c.in); got != c.want {
419 t.Errorf("FootnoteLabel(%q) = %q, want %q", c.in, got, c.want)
420 }
421 }
422}
423
424func TestLanguageName(t *testing.T) {
425 cases := []struct{ in, want string }{
426 {"go", "go"},
427 {"c++", "c++"},
428 {"python3", "python3"},
429 {"objective-c", "objective-c"},
430 {"with space", ""},
431 {"", ""},
432 }
433 for _, c := range cases {
434 if got := LanguageName(c.in); got != c.want {
435 t.Errorf("LanguageName(%q) = %q, want %q", c.in, got, c.want)
436 }
437 }
438}
439
440func TestNestedPrefix(t *testing.T) {
441 cases := []struct{ in, want string }{
442 {"", ""},
443 {" ", " "},
444 {"\t", "\t"},
445 {"> ", "> "},
446 {"> > ", "> > "},
447 {"## ", ""}, // markdown-active prefix rejected
448 {"- ", ""},
449 }
450 for _, c := range cases {
451 if got := NestedPrefix(c.in); got != c.want {
452 t.Errorf("NestedPrefix(%q) = %q, want %q", c.in, got, c.want)
453 }
454 }
455}
456
457func TestCodeFence(t *testing.T) {
458 if got := CodeFence("```", 3); got != "````" {
459 t.Errorf("CodeFence: got %q, want %q", got, "````")
460 }
461 if got := CodeFence("", 3); got != "```" {
462 t.Errorf("CodeFence empty: got %q, want %q", got, "```")
463 }
464}