Search Apps Documentation Source Content File Folder Download Copy Actions Download

sanitize_test.gno

16.46 Kb · 464 lines
  1package sanitize
  2
  3import (
  4	"strings"
  5	"testing"
  6)
  7
  8func TestStripBidiAndZeroWidthRe(t *testing.T) {
  9	// Re-export sanity check.
 10	if got := StripBidiAndZeroWidth("a\u200Bb"); got != "ab" {
 11		t.Errorf("re-export StripBidiAndZeroWidth failed: got %q", got)
 12	}
 13}
 14
 15func TestNormalizeBreaksRe(t *testing.T) {
 16	if got := NormalizeBreaks("a\r\nb"); got != "a\nb" {
 17		t.Errorf("re-export NormalizeBreaks failed: got %q", got)
 18	}
 19}
 20
 21func TestInlineText(t *testing.T) {
 22	cases := []struct{ in, want string }{
 23		{"plain", "plain"},
 24		{"a*b", `a\*b`},
 25		{"a|b", "a|b"},                    // | NOT escaped
 26		{"a=b", "a=b"},                    // = NOT escaped
 27		{"line1\nline2", "line1 line2"},   // newline folded
 28		{"line1\r\nline2", "line1 line2"}, // CRLF normalized then folded
 29		{"a\u2028b", "a b"},               // U+2028 folded
 30		{"a\u0085b", "a b"},               // NEL folded
 31		{"a\u200Bb", "ab"},                // ZWSP stripped
 32		{"hello *world*", `hello \*world\*`},
 33	}
 34	for _, c := range cases {
 35		if got := InlineText(c.in); got != c.want {
 36			t.Errorf("InlineText(%q) = %q, want %q", c.in, got, c.want)
 37		}
 38	}
 39}
 40
 41func TestBlock(t *testing.T) {
 42	// Block wraps every non-empty output with "\n\n" — CM §4.8 blank
 43	// lines — on BOTH sides, so user content is paragraph-isolated
 44	// from any realm chrome that precedes OR follows it. Bounds CM
 45	// §4.6 HTML block types 6/7 (`<div>`, `<table>`, …) which are not
 46	// escaped in any mode and would otherwise consume appended chrome.
 47	cases := []struct{ in, want string }{
 48		{"hello world\n", "\n\nhello world\n\n"},
 49		{"# heading\n", "\n\n\\# heading\n\n"},
 50		{"> quote\n", "\n\n\\> quote\n\n"},
 51		{"| a | b |\n", "\n\n\\| a | b |\n\n"}, // GFM table-row escaped in strict mode
 52		// CM §4.6 HTML block types 1-5 — escaped (blank-line-NON-terminating).
 53		{"<script>x</script>\n", "\n\n\\<script>x</script>\n\n"},
 54		{"<!-- comment -->\n", "\n\n\\<!-- comment -->\n\n"},
 55		{"<?php x ?>\n", "\n\n\\<?php x ?>\n\n"},
 56		{"<!DOCTYPE html>\n", "\n\n\\<!DOCTYPE html>\n\n"},
 57		{"```\ncode\n", "\n\n```\ncode\n```\n\n"}, // fence auto-close at EOF
 58		// Empty / strip-to-empty inputs short-circuit (no stray blank line).
 59		{"[x]: y\n", ""},
 60		{"", ""},
 61	}
 62	for _, c := range cases {
 63		got := Block(c.in)
 64		if got != c.want {
 65			t.Errorf("Block(%q) = %q, want %q", c.in, got, c.want)
 66		}
 67		// Idempotency: Block(Block(in)) must be byte-identical to
 68		// Block(in) for every input in the table.
 69		twice := Block(got)
 70		if twice != got {
 71			t.Errorf("Block not idempotent for %q: Block(once)=%q, Block(twice)=%q", c.in, got, twice)
 72		}
 73	}
 74}
 75
 76func TestBlockRich(t *testing.T) {
 77	// BlockRich wraps every non-empty output with "\n\n" — CM §4.8
 78	// blank lines — on BOTH sides, so user content is paragraph-
 79	// isolated from any realm chrome that precedes OR follows it.
 80	cases := []struct{ in, want string }{
 81		// Block-level markdown that Block escapes — preserved by BlockRich.
 82		{"hello world\n", "\n\nhello world\n\n"},
 83		{"# heading\n", "\n\n# heading\n\n"},
 84		{"> quote\n", "\n\n> quote\n\n"},
 85		{"- item\n", "\n\n- item\n\n"},
 86		{"1. item\n", "\n\n1. item\n\n"},
 87		{"---\n", "\n\n\\---\n\n"},                       // first-line setext-h2 → escaped by qualifying-setext pre-pass
 88		{"***\n", "\n\n***\n\n"},                         // thematic break NOT setext-h2 — preserved
 89		{"text\n===\n", "\n\ntext\n===\n\n"},             // deeper setext preserved (user-authored above)
 90		{"body\n===\nmore\n", "\n\nbody\n===\nmore\n\n"}, // cross-paragraph backward attack: realm `chrome\n\nbody\n===\nmore` keeps realm in its own paragraph
 91		// GFM tables PRESERVED in Rich mode.
 92		{"| a | b |\n", "\n\n| a | b |\n\n"},
 93		{"| H |\n|---|\n| a |\n", "\n\n| H |\n|---|\n| a |\n\n"}, // full table renders as <table>
 94		// Realm-binding defenses STILL ON.
 95		{"<gno-card>\n", "\n\n\\<gno-card>\n\n"},
 96		// CM §4.6 HTML block types 1-5 — escaped in Rich mode too
 97		// (defense is mode-independent).
 98		{"<script>x</script>\n", "\n\n\\<script>x</script>\n\n"},
 99		{"<!-- comment -->\n", "\n\n\\<!-- comment -->\n\n"},
100		{"<?php x ?>\n", "\n\n\\<?php x ?>\n\n"},
101		{"<!DOCTYPE html>\n", "\n\n\\<!DOCTYPE html>\n\n"},
102		{"[t][l]\n", "\n\n\\[t\\]\\[l\\]\n\n"},
103		{"[^name]\n", "\n\n\\[^name\\]\n\n"},
104		// LRD-only input strips to nothing; empty-after-escape short-
105		// circuits to "" so realm concatenation doesn't leak a stray
106		// blank line.
107		{"[x]: y\n", ""},
108		{"", ""}, // empty input → empty output, no stray blank line
109		{"```\ncode\n", "\n\n```\ncode\n```\n\n"},
110	}
111	for _, c := range cases {
112		got := BlockRich(c.in)
113		if got != c.want {
114			t.Errorf("BlockRich(%q) = %q, want %q", c.in, got, c.want)
115		}
116		// Idempotency: BlockRich(BlockRich(in)) must be byte-identical
117		// to BlockRich(in) for every input in the table.
118		twice := BlockRich(got)
119		if twice != got {
120			t.Errorf("BlockRich not idempotent for %q: BlockRich(once)=%q, BlockRich(twice)=%q", c.in, got, twice)
121		}
122	}
123}
124
125func TestBlockRich_LeadingBlankLineGuaranteed(t *testing.T) {
126	// Every non-empty result begins with "\n\n" so `chrome +
127	// BlockRich(user)` cannot place the user's first line in the same
128	// paragraph as the realm's last line (which would let a deeper
129	// `===` or `|---|` in user content retroactively promote realm
130	// chrome).
131	for _, in := range []string{"x", "x\n", "# h", "- i\n- j", "| a |\n|---|\n| 1 |"} {
132		got := BlockRich(in)
133		if got == "" {
134			continue
135		}
136		if !strings.HasPrefix(got, "\n\n") {
137			t.Errorf("BlockRich(%q) = %q; expected leading '\\n\\n'", in, got)
138		}
139	}
140}
141
142func TestBlockRich_TrailingBlankLineGuaranteed(t *testing.T) {
143	// Every non-empty result ends with "\n\n" so `BlockRich(user) +
144	// chrome` cannot extend user's last paragraph into the realm's
145	// next line (CM §5.2 lazy continuation, or a realm-supplied
146	// `|---|` row retroactively promoting user's last line into a
147	// `<thead>` header).
148	for _, in := range []string{"x", "x\n", "# h", "- i\n- j", "| H |\n|---|\n| a |"} {
149		got := BlockRich(in)
150		if got == "" {
151			continue
152		}
153		if !strings.HasSuffix(got, "\n\n") {
154			t.Errorf("BlockRich(%q) = %q; expected trailing '\\n\\n'", in, got)
155		}
156	}
157}
158
159func TestBlockquoteRich(t *testing.T) {
160	// BlockquoteRich strips BlockRich's leading "\n", line-prefixes
161	// each remaining line with "> ", and emits "\n" on both ends:
162	// leading "\n" prevents `chrome + BlockquoteRich(user)` from
163	// landing the first `>` mid-line; trailing "\n\n" (blank line)
164	// prevents `BlockquoteRich(user) + chrome` from pulling chrome
165	// into the quote via CM §5.2 lazy continuation.
166	cases := []struct{ name, in, want string }{
167		{"plain text", "hello world\n", "\n> hello world\n\n"},
168		{"atx heading preserved", "# heading\n", "\n> # heading\n\n"},
169		{"nested blockquote", "> nested\n", "\n> > nested\n\n"},
170		{"list item preserved", "- item\n", "\n> - item\n\n"},
171		{"ordered list preserved", "1. item\n", "\n> 1. item\n\n"},
172		{"first-line setext escaped", "---\n", "\n> \\---\n\n"},
173		{"deeper setext preserved", "text\n===\n", "\n> text\n> ===\n\n"},
174		{"thematic break asterisks preserved", "***\n", "\n> ***\n\n"},
175		// Realm-binding defenses still on.
176		{"gno extension escaped", "<gno-card>\n", "\n> \\<gno-card>\n\n"},
177		{"ref-link use escaped", "[t][l]\n", "\n> \\[t\\]\\[l\\]\n\n"},
178		{"footnote escaped", "[^name]\n", "\n> \\[^name\\]\n\n"},
179		// LRDs are stripped by BlockRich entirely; trimming the
180		// resulting bare "\n" leaves empty input and the helper
181		// returns "" without emitting a blockquote.
182		{"lrd alone strips to empty", "[x]: y\n", ""},
183		// Code fence autoclose at EOF lands inside the quote.
184		{"unclosed fence autoclosed", "```\ncode\n", "\n> ```\n> code\n> ```\n\n"},
185		// Multi-paragraph preserves the inner blank line (rendered
186		// as a quoted blank line `> \n`).
187		{"multi-paragraph preserves blank", "a\n\nb\n", "\n> a\n> \n> b\n\n"},
188		// Empty / blank-only input collapses cleanly to "".
189		{"empty input", "", ""},
190		// CRLF normalized through BlockRich.
191		{"crlf normalized", "a\r\nb\n", "\n> a\n> b\n\n"},
192		// NUL replaced with U+FFFD by BlockRich.
193		{"nul replaced", "x\x00y\n", "\n> x\uFFFDy\n\n"},
194		// Multiple trailing newlines collapse to the single trailing
195		// blank line shape — output never ends with more than `\n\n`.
196		{"multiple trailing newlines collapse", "foo\n\n\n", "\n> foo\n\n"},
197		// Internal tabs are preserved (BlockRich does not touch them).
198		{"internal tab preserved", "a\tb\n", "\n> a\tb\n\n"},
199		// Whitespace-only input still yields a blockquote (the user
200		// wrote literal spaces). The line-prefix loop emits `> ` plus
201		// the original two spaces.
202		{"whitespace-only input", "  ", "\n>   \n\n"},
203	}
204	for _, c := range cases {
205		got := BlockquoteRich(c.in)
206		if got != c.want {
207			t.Errorf("%s: BlockquoteRich(%q) = %q, want %q", c.name, c.in, got, c.want)
208		}
209	}
210}
211
212func TestBlockquoteRich_DoubleWrapNestsQuote(t *testing.T) {
213	// Not idempotent: calling twice nests the quote one level
214	// deeper. The aggressive TrimRight in BlockquoteRich also
215	// collapses the inner trailing blank line, so the second pass
216	// produces a clean `> > foo` nesting (no `> ` quoted blank in
217	// the middle) plus the outer trailing blank line.
218	once := BlockquoteRich("foo\n")
219	twice := BlockquoteRich(once)
220	if want := "\n> > foo\n\n"; twice != want {
221		t.Errorf("BlockquoteRich(BlockquoteRich(%q)) = %q, want %q", "foo\n", twice, want)
222	}
223}
224
225func TestBlockquoteRich_LeadingNewlineGuaranteed(t *testing.T) {
226	// Every non-empty result starts with "\n" so realm concatenation
227	// like `chrome + BlockquoteRich(user)` doesn't place `>` mid-line.
228	for _, in := range []string{"x", "x\n", "# h", "- i\n- j"} {
229		got := BlockquoteRich(in)
230		if got == "" {
231			continue
232		}
233		if got[0] != '\n' {
234			t.Errorf("BlockquoteRich(%q) = %q; expected leading '\\n'", in, got)
235		}
236	}
237}
238
239func TestBlockquoteRich_TrailingBlankLineGuaranteed(t *testing.T) {
240	// Every non-empty result ends with "\n\n" so realm concatenation
241	// like `BlockquoteRich(user) + "more text"` doesn't pull "more
242	// text" into the quote via CM §5.2 lazy continuation.
243	for _, in := range []string{"x", "x\n", "# h", "- i\n- j", "para\n\nmore"} {
244		got := BlockquoteRich(in)
245		if got == "" {
246			continue
247		}
248		if !strings.HasSuffix(got, "\n\n") {
249			t.Errorf("BlockquoteRich(%q) = %q; expected trailing '\\n\\n'", in, got)
250		}
251	}
252}
253
254func TestBlockquoteRich_NoStrayEmptyQuotedLine(t *testing.T) {
255	// BlockRich's own leading "\n" must be stripped before
256	// line-prefixing — otherwise the output would start with a
257	// useless `> \n` empty quoted line.
258	got := BlockquoteRich("foo\n")
259	if strings.HasPrefix(got, "\n> \n") {
260		t.Errorf("BlockquoteRich leaked BlockRich's leading '\\n' as `> \\n`: %q", got)
261	}
262}
263
264func TestNeuterLeadingSetextIfQualifying(t *testing.T) {
265	cases := []struct{ name, in, want string }{
266		{"leading-setext-h1", "===\nbody\n", "\\===\nbody\n"},
267		{"leading-setext-h2", "---\nbody\n", "\\---\nbody\n"},
268		{"leading-setext-indented", "   ===\nbody\n", "   \\===\nbody\n"},
269		{"leading-setext-indented-4", "    ===\nbody\n", "    ===\nbody\n"}, // indented code
270		{"leading-setext-trailing-ws", "===   \nbody\n", "\\===   \nbody\n"},
271		{"leading-setext-mixed-chars", "=-=-\nbody\n", "=-=-\nbody\n"},         // mixed; not setext
272		{"leading-setext-has-content", "=== text\nbody\n", "=== text\nbody\n"}, // mixed content
273		{"setext-deeper-untouched", "title\n===\nfoo\n", "title\n===\nfoo\n"},
274		{"leading-thematic-asterisk", "***\nbody\n", "***\nbody\n"}, // not setext, untouched
275		{"leading-thematic-underscore", "___\nbody\n", "___\nbody\n"},
276		{"leading-blank-then-setext", "\n===\nbody\n", "\n\\===\nbody\n"},
277		{"leading-blank-ws-then-setext", "   \n===\nbody\n", "   \n\\===\nbody\n"},
278		{"text-first", "hello\n===\n", "hello\n===\n"}, // text before === — user-authored
279		{"empty", "", ""},
280		{"all-blank", "   \n\n", "   \n\n"},
281		{"only-equals", "===", "\\==="},
282	}
283	for _, c := range cases {
284		if got := neuterLeadingSetextIfQualifying(c.in); got != c.want {
285			t.Errorf("%s: neuterLeadingSetextIfQualifying(%q) = %q, want %q", c.name, c.in, got, c.want)
286		}
287	}
288}
289
290func TestLinkTitle(t *testing.T) {
291	cases := []struct{ in, want string }{
292		{`he said "hi"`, `he said \"hi\"`},
293		{`it's nice`, `it\'s nice`},
294		{"line1\nline2", "line1 line2"},
295	}
296	for _, c := range cases {
297		if got := LinkTitle(c.in); got != c.want {
298			t.Errorf("LinkTitle(%q) = %q, want %q", c.in, got, c.want)
299		}
300	}
301}
302
303func TestTableCell(t *testing.T) {
304	cases := []struct{ in, want string }{
305		{"plain cell", "plain cell"},
306		{"a|b", `a\|b`},
307		{"a\tb", "a b"},
308		{"a*b|c", `a\*b\|c`},
309	}
310	for _, c := range cases {
311		if got := TableCell(c.in); got != c.want {
312			t.Errorf("TableCell(%q) = %q, want %q", c.in, got, c.want)
313		}
314	}
315}
316
317func TestHTMLEscape(t *testing.T) {
318	cases := []struct{ in, want string }{
319		{"plain", "plain"},
320		{"<script>", "&lt;script&gt;"},
321		{`a & b`, "a &amp; b"},
322		{`"quoted"`, "&#34;quoted&#34;"},
323	}
324	for _, c := range cases {
325		if got := HTMLEscape(c.in); got != c.want {
326			t.Errorf("HTMLEscape(%q) = %q, want %q", c.in, got, c.want)
327		}
328	}
329}
330
331func TestURL(t *testing.T) {
332	cases := []struct{ in, want string }{
333		{"https://example.com/x", "https://example.com/x"},
334		{"http://example.com", "http://example.com"},
335		{"mailto:a@b.com", "mailto:a@b.com"},
336		{"mailto:a@b.com?body=phish", ""}, // prefill phishing rejected
337		{"//evil.com", ""},                // protocol-relative rejected
338		{"javascript:alert(1)", ""},       // bad scheme
339		{"/r/foo", "/r/foo"},              // relative
340		{"./local", "./local"},
341		{"#section", "#section"}, // fragment-only
342		{"", ""},
343		{"   ", ""},
344		{"https://a.com/path with space", "https://a.com/path%20with%20space"},
345	}
346	for _, c := range cases {
347		if got := URL(c.in); got != c.want {
348			t.Errorf("URL(%q) = %q, want %q", c.in, got, c.want)
349		}
350	}
351}
352
353func TestImageURL(t *testing.T) {
354	cases := []struct{ in, want string }{
355		{"https://example.com/img.png", "https://example.com/img.png"},
356		{"mailto:a@b.com", ""}, // mailto rejected for images
357		{"data:image/svg+xml,<svg/>", "data:image/svg+xml,%3Csvg/%3E"},
358		{"data:image/png;base64,XXX", "data:image/png;base64,XXX"},
359		{"data:text/html,<script>", ""}, // bad data subset
360		{"javascript:alert(1)", ""},
361		{"", ""},
362	}
363	for _, c := range cases {
364		if got := ImageURL(c.in); got != c.want {
365			t.Errorf("ImageURL(%q) = %q, want %q", c.in, got, c.want)
366		}
367	}
368}
369
370func TestUserName(t *testing.T) {
371	cases := []struct{ in, want string }{
372		{"alice", "alice"},
373		{"alice123", "alice123"},
374		{"alice_bob-cat", "alice_bob-cat"},
375		{"Alice", ""},  // uppercase first
376		{"1alice", ""}, // digit first
377		{"", ""},
378		{"a\u200Blice", "alice"}, // bidi stripped, then matches
379	}
380	for _, c := range cases {
381		if got := UserName(c.in); got != c.want {
382			t.Errorf("UserName(%q) = %q, want %q", c.in, got, c.want)
383		}
384	}
385}
386
387func TestBechString(t *testing.T) {
388	addrG := "g1abc123def456ghi789jkl012mno345p"
389	cases := []struct {
390		s, prefix string
391		want      string
392	}{
393		{addrG, "g", addrG},
394		{addrG, "", addrG},  // any prefix
395		{addrG, "gpub", ""}, // wrong prefix
396		{"gpub1abc123def456ghijklmn", "gpub", "gpub1abc123def456ghijklmn"},
397		{"gpub1abc123def456ghijklmn", "", "gpub1abc123def456ghijklmn"},
398		{"b1xyz789abc123def456", "", "b1xyz789abc123def456"}, // any-prefix mode allows b1...
399		{"g1ABC", "g", ""}, // uppercase rejected
400		{"x", "g", ""},
401		{"", "g", ""},
402	}
403	for _, c := range cases {
404		if got := BechString(c.s, c.prefix); got != c.want {
405			t.Errorf("BechString(%q,%q) = %q, want %q", c.s, c.prefix, got, c.want)
406		}
407	}
408}
409
410func TestFootnoteLabel(t *testing.T) {
411	cases := []struct{ in, want string }{
412		{"note1", "note1"},
413		{"Note_A-1", "Note_A-1"},
414		{"with space", ""},
415		{"", ""},
416	}
417	for _, c := range cases {
418		if got := FootnoteLabel(c.in); got != c.want {
419			t.Errorf("FootnoteLabel(%q) = %q, want %q", c.in, got, c.want)
420		}
421	}
422}
423
424func TestLanguageName(t *testing.T) {
425	cases := []struct{ in, want string }{
426		{"go", "go"},
427		{"c++", "c++"},
428		{"python3", "python3"},
429		{"objective-c", "objective-c"},
430		{"with space", ""},
431		{"", ""},
432	}
433	for _, c := range cases {
434		if got := LanguageName(c.in); got != c.want {
435			t.Errorf("LanguageName(%q) = %q, want %q", c.in, got, c.want)
436		}
437	}
438}
439
440func TestNestedPrefix(t *testing.T) {
441	cases := []struct{ in, want string }{
442		{"", ""},
443		{"  ", "  "},
444		{"\t", "\t"},
445		{"> ", "> "},
446		{"> > ", "> > "},
447		{"## ", ""}, // markdown-active prefix rejected
448		{"- ", ""},
449	}
450	for _, c := range cases {
451		if got := NestedPrefix(c.in); got != c.want {
452			t.Errorf("NestedPrefix(%q) = %q, want %q", c.in, got, c.want)
453		}
454	}
455}
456
457func TestCodeFence(t *testing.T) {
458	if got := CodeFence("```", 3); got != "````" {
459		t.Errorf("CodeFence: got %q, want %q", got, "````")
460	}
461	if got := CodeFence("", 3); got != "```" {
462		t.Errorf("CodeFence empty: got %q, want %q", got, "```")
463	}
464}