markdown sanitization, addresses issue #46

This commit is contained in:
Robur 2021-12-01 16:21:12 +00:00
parent 68237ef382
commit 0afec1617b
3 changed files with 187 additions and 3 deletions

View file

@ -222,7 +222,82 @@ let builder section_job_map =
a ~a:[a_href "/failed-builds/"]
[txt "here"];
txt "."
]])
]])
let safe_omd omd =
let rec safe_block = function
| Omd.Paragraph (attr, inline) ->
safe_inline inline
|> Option.map (fun inline -> Omd.Paragraph (attr, inline))
| Omd.List (attr, typ, spacing, blocks) ->
let blocks = List.filter_map (fun b ->
let b = List.filter_map safe_block b in
if b = [] then None else Some b)
blocks
in
if blocks = [] then None else
Some (Omd.List (attr, typ, spacing, blocks))
| Omd.Blockquote (attr, blocks) ->
let blocks = List.filter_map safe_block blocks in
if blocks = [] then None else
Some (Omd.Blockquote (attr, blocks))
| Omd.Heading (attr, level, inline) ->
safe_inline inline
|> Option.map (fun inline -> Omd.Heading (attr, level, inline))
| Omd.Html_block _ -> None
| Omd.Definition_list (attr, def_elts) ->
let def_elts = List.filter_map safe_def_elts def_elts in
if def_elts = [] then None else
Some (Omd.Definition_list (attr, def_elts))
| Omd.Code_block _
| Omd.Thematic_break _ as v -> Some v
and safe_def_elts { term ; defs } =
let defs = List.filter_map safe_inline defs in
safe_inline term
|> Option.map (fun term -> { Omd.term ; defs })
and safe_inline = function
| Concat (attr, inline) ->
Some (Concat (attr, List.filter_map safe_inline inline))
| Emph (attr, inline) ->
safe_inline inline
|> Option.map (fun inline -> Omd.Emph (attr, inline))
| Strong (attr, inline) ->
safe_inline inline
|> Option.map (fun inline -> Omd.Strong (attr, inline))
| Link (attr, link) ->
begin match safe_link link with
| `No_label | `Relative -> safe_inline link.Omd.label
| `Link l -> Some (Omd.Link (attr, l))
end
| Image (attr, link) ->
begin match safe_link link with
| `No_label | `Relative -> None
| `Link l -> Some (Omd.Image (attr, l))
end
| Html _ -> None
| Text _
| Code _
| Hard_break _
| Soft_break _ as v -> Some v
and safe_link ({ label ; destination ; _ } as l) =
let absolute_link =
String.(length destination >= 2 && equal (sub destination 0 2) "//") ||
String.(length destination >= 7 && equal (sub destination 0 7) "http://") ||
String.(length destination >= 8 && equal (sub destination 0 8) "https://")
in
if absolute_link then
match safe_inline label with
| None -> `No_label
| Some label -> `Link { l with label }
else
`Relative
in
List.filter_map safe_block omd
let markdown_to_html data =
let omd = Omd.of_string data in
let omd = safe_omd omd in
Omd.to_html omd
let job ~failed name platform readme builds =
layout ~nav:(`Job (name, platform)) ~title:(Fmt.str "Job %s %a" name pp_platform platform)
@ -233,7 +308,7 @@ let job ~failed name platform readme builds =
[
h2 ~a:[a_id "readme"] [txt "README"];
a ~a:[a_href "#builds"] [txt "Skip to builds"];
Unsafe.data Omd.(to_html (of_string data))
Unsafe.data (markdown_to_html data)
])) @
[
h2 ~a:[a_id "builds"] [txt "Builds"];
@ -277,7 +352,7 @@ let job_build
[
h2 ~a:[a_id "readme"] [txt "README"];
a ~a:[a_href "#build"] [txt "Skip to build"];
Unsafe.data Omd.(to_html (of_string data))
Unsafe.data (markdown_to_html data)
])) @
[
h2 ~a:[a_id "build"] [txtf "Build %a" pp_ptime start];

View file

@ -1,3 +1,9 @@
(test
(name test_builder_db)
(modules test_builder_db)
(libraries builder_db caqti.blocking alcotest mirage-crypto-rng.unix))
(test
(name markdown_to_html)
(modules markdown_to_html)
(libraries builder_web alcotest))

103
test/markdown_to_html.ml Normal file
View file

@ -0,0 +1,103 @@
let markdown_to_html = Builder_web__Views.markdown_to_html
let test_simple () =
let markdown = {|# Hello world|} in
let html = markdown_to_html markdown in
Alcotest.(check string "simple html" "<h1>Hello world</h1>\n" html)
let test_html_script () =
let markdown = {|# <script>Hello world</script>|} in
let html = markdown_to_html markdown in
Alcotest.(check string "html script header" "<h1>Hello world</h1>\n" html)
let test_preserve_span_content () =
let markdown = {|* <span id="myref">My ref</span>
* [See my ref](#myref) for more information|} in
let html = markdown_to_html markdown in
Alcotest.(check string "html span content preserved"
{|<ul>
<li>My ref
</li>
<li>See my ref for more information
</li>
</ul>
|}
html)
let test_remove_script () =
let markdown = {|<script>alert(1);</script>|} in
let html = markdown_to_html markdown in
Alcotest.(check string "html script removed" "" html)
let test_list_with_html_block_and_markdown () =
let markdown = "* <div> Hello, World!</div> *this is not html*" in
let html = markdown_to_html markdown in
Alcotest.(check string "list with html block and markdown"
(*"<ul>\n<li><em>this is not html</em>\n</li>\n</ul>\n"*) ""
html)
let test_list_with_inline_html_and_markdown () =
let markdown = "* <span> Hello, World!</span> *this is not html*" in
let html = markdown_to_html markdown in
Alcotest.(check string "list with html block and markdown"
"<ul>\n<li> Hello, World! <em>this is not html</em>\n</li>\n</ul>\n"
html)
let test_absolute_link () =
let markdown = "[foo](https://foo.com)" in
let html = markdown_to_html markdown in
Alcotest.(check string "absolute link" "<p><a href=\"https://foo.com\">foo</a></p>\n" html)
let test_relative_link () =
let markdown = "[foo](../foo.jpg)" in
let html = markdown_to_html markdown in
Alcotest.(check string "relative link" "<p>foo</p>\n" html)
let test_absolute_image () =
let markdown = "![alttext](https://foo.com/bar.jpg)" in
let html = markdown_to_html markdown in
Alcotest.(check string "absolute image"
"<p><img src=\"https://foo.com/bar.jpg\" alt=\"alttext\" /></p>\n" html)
let test_absolute_image_no_alt () =
let markdown = "![](https://foo.com/bar.jpg)" in
let html = markdown_to_html markdown in
Alcotest.(check string "absolute image"
"<p><img src=\"https://foo.com/bar.jpg\" alt=\"\" /></p>\n" html)
let test_relative_image () =
let markdown = "![](/bar.jpg)" in
let html = markdown_to_html markdown in
Alcotest.(check string "relative image" "" html)
let test_absolute_image_script_alt () =
let markdown = "![<script src=\"bla.js\"></script>](https://foo.com/bar.jpg)" in
let html = markdown_to_html markdown in
Alcotest.(check string "absolute image with script alt text"
"<p><img src=\"https://foo.com/bar.jpg\" alt=\"\" /></p>\n" html)
let test_fragment_link () =
let markdown = "[fragment](#fragment)" in
let html = markdown_to_html markdown in
Alcotest.(check string "fragment link" "<p>fragment</p>\n" html)
let markdown_tests = [
Alcotest.test_case "Simple" `Quick test_simple;
Alcotest.test_case "script header" `Quick test_html_script;
Alcotest.test_case "preserve span content" `Quick test_preserve_span_content;
Alcotest.test_case "Remove script" `Quick test_remove_script;
Alcotest.test_case "List with html block and markdown" `Quick test_list_with_html_block_and_markdown;
Alcotest.test_case "List with inline html and markdown" `Quick test_list_with_inline_html_and_markdown;
Alcotest.test_case "absolute link" `Quick test_absolute_link;
Alcotest.test_case "relative link" `Quick test_relative_link;
Alcotest.test_case "absolute image" `Quick test_absolute_image;
Alcotest.test_case "absolute image no alt" `Quick test_absolute_image_no_alt;
Alcotest.test_case "relative image" `Quick test_relative_image;
Alcotest.test_case "absolute image with script alt" `Quick test_absolute_image_script_alt;
Alcotest.test_case "fragment link" `Quick test_fragment_link;
]
let () =
Alcotest.run "Markdown to HTML" [
"markdown", markdown_tests
]