From 0afec1617ba438295e89f14294cd04e747f8a28e Mon Sep 17 00:00:00 2001 From: Robur Date: Wed, 1 Dec 2021 16:21:12 +0000 Subject: [PATCH] markdown sanitization, addresses issue #46 --- lib/views.ml | 81 ++++++++++++++++++++++++++++-- test/dune | 6 +++ test/markdown_to_html.ml | 103 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 187 insertions(+), 3 deletions(-) create mode 100644 test/markdown_to_html.ml diff --git a/lib/views.ml b/lib/views.ml index 5aece48..6e6ad4f 100644 --- a/lib/views.ml +++ b/lib/views.ml @@ -222,7 +222,82 @@ let builder section_job_map = a ~a:[a_href "/failed-builds/"] [txt "here"]; txt "." - ]]) + ]]) + +let safe_omd omd = + let rec safe_block = function + | Omd.Paragraph (attr, inline) -> + safe_inline inline + |> Option.map (fun inline -> Omd.Paragraph (attr, inline)) + | Omd.List (attr, typ, spacing, blocks) -> + let blocks = List.filter_map (fun b -> + let b = List.filter_map safe_block b in + if b = [] then None else Some b) + blocks + in + if blocks = [] then None else + Some (Omd.List (attr, typ, spacing, blocks)) + | Omd.Blockquote (attr, blocks) -> + let blocks = List.filter_map safe_block blocks in + if blocks = [] then None else + Some (Omd.Blockquote (attr, blocks)) + | Omd.Heading (attr, level, inline) -> + safe_inline inline + |> Option.map (fun inline -> Omd.Heading (attr, level, inline)) + | Omd.Html_block _ -> None + | Omd.Definition_list (attr, def_elts) -> + let def_elts = List.filter_map safe_def_elts def_elts in + if def_elts = [] then None else + Some (Omd.Definition_list (attr, def_elts)) + | Omd.Code_block _ + | Omd.Thematic_break _ as v -> Some v + and safe_def_elts { term ; defs } = + let defs = List.filter_map safe_inline defs in + safe_inline term + |> Option.map (fun term -> { Omd.term ; defs }) + and safe_inline = function + | Concat (attr, inline) -> + Some (Concat (attr, List.filter_map safe_inline inline)) + | Emph (attr, inline) -> + safe_inline inline + |> Option.map (fun inline -> Omd.Emph (attr, inline)) + | Strong (attr, inline) -> + safe_inline inline + |> Option.map (fun inline -> Omd.Strong (attr, inline)) + | Link (attr, link) -> + begin match safe_link link with + | `No_label | `Relative -> safe_inline link.Omd.label + | `Link l -> Some (Omd.Link (attr, l)) + end + | Image (attr, link) -> + begin match safe_link link with + | `No_label | `Relative -> None + | `Link l -> Some (Omd.Image (attr, l)) + end + | Html _ -> None + | Text _ + | Code _ + | Hard_break _ + | Soft_break _ as v -> Some v + and safe_link ({ label ; destination ; _ } as l) = + let absolute_link = + String.(length destination >= 2 && equal (sub destination 0 2) "//") || + String.(length destination >= 7 && equal (sub destination 0 7) "http://") || + String.(length destination >= 8 && equal (sub destination 0 8) "https://") + in + if absolute_link then + match safe_inline label with + | None -> `No_label + | Some label -> `Link { l with label } + else + `Relative + in + List.filter_map safe_block omd + +let markdown_to_html data = + let omd = Omd.of_string data in + let omd = safe_omd omd in + Omd.to_html omd let job ~failed name platform readme builds = layout ~nav:(`Job (name, platform)) ~title:(Fmt.str "Job %s %a" name pp_platform platform) @@ -233,7 +308,7 @@ let job ~failed name platform readme builds = [ h2 ~a:[a_id "readme"] [txt "README"]; a ~a:[a_href "#builds"] [txt "Skip to builds"]; - Unsafe.data Omd.(to_html (of_string data)) + Unsafe.data (markdown_to_html data) ])) @ [ h2 ~a:[a_id "builds"] [txt "Builds"]; @@ -277,7 +352,7 @@ let job_build [ h2 ~a:[a_id "readme"] [txt "README"]; a ~a:[a_href "#build"] [txt "Skip to build"]; - Unsafe.data Omd.(to_html (of_string data)) + Unsafe.data (markdown_to_html data) ])) @ [ h2 ~a:[a_id "build"] [txtf "Build %a" pp_ptime start]; diff --git a/test/dune b/test/dune index f582d42..a759c27 100644 --- a/test/dune +++ b/test/dune @@ -1,3 +1,9 @@ (test (name test_builder_db) + (modules test_builder_db) (libraries builder_db caqti.blocking alcotest mirage-crypto-rng.unix)) + +(test + (name markdown_to_html) + (modules markdown_to_html) + (libraries builder_web alcotest)) diff --git a/test/markdown_to_html.ml b/test/markdown_to_html.ml new file mode 100644 index 0000000..bfabe0f --- /dev/null +++ b/test/markdown_to_html.ml @@ -0,0 +1,103 @@ +let markdown_to_html = Builder_web__Views.markdown_to_html + +let test_simple () = + let markdown = {|# Hello world|} in + let html = markdown_to_html markdown in + Alcotest.(check string "simple html" "

Hello world

\n" html) + +let test_html_script () = + let markdown = {|# |} in + let html = markdown_to_html markdown in + Alcotest.(check string "html script header" "

Hello world

\n" html) + +let test_preserve_span_content () = + let markdown = {|* My ref +* [See my ref](#myref) for more information|} in + let html = markdown_to_html markdown in + Alcotest.(check string "html span content preserved" + {| +|} + html) + +let test_remove_script () = + let markdown = {||} in + let html = markdown_to_html markdown in + Alcotest.(check string "html script removed" "" html) + +let test_list_with_html_block_and_markdown () = + let markdown = "*
Hello, World!
*this is not html*" in + let html = markdown_to_html markdown in + Alcotest.(check string "list with html block and markdown" + (*"\n"*) "" + html) + +let test_list_with_inline_html_and_markdown () = + let markdown = "* Hello, World! *this is not html*" in + let html = markdown_to_html markdown in + Alcotest.(check string "list with html block and markdown" + "\n" + html) + +let test_absolute_link () = + let markdown = "[foo](https://foo.com)" in + let html = markdown_to_html markdown in + Alcotest.(check string "absolute link" "

foo

\n" html) + +let test_relative_link () = + let markdown = "[foo](../foo.jpg)" in + let html = markdown_to_html markdown in + Alcotest.(check string "relative link" "

foo

\n" html) + +let test_absolute_image () = + let markdown = "![alttext](https://foo.com/bar.jpg)" in + let html = markdown_to_html markdown in + Alcotest.(check string "absolute image" + "

\"alttext\"

\n" html) + +let test_absolute_image_no_alt () = + let markdown = "![](https://foo.com/bar.jpg)" in + let html = markdown_to_html markdown in + Alcotest.(check string "absolute image" + "

\"\"

\n" html) + +let test_relative_image () = + let markdown = "![](/bar.jpg)" in + let html = markdown_to_html markdown in + Alcotest.(check string "relative image" "" html) + +let test_absolute_image_script_alt () = + let markdown = "![](https://foo.com/bar.jpg)" in + let html = markdown_to_html markdown in + Alcotest.(check string "absolute image with script alt text" + "

\"\"

\n" html) + +let test_fragment_link () = + let markdown = "[fragment](#fragment)" in + let html = markdown_to_html markdown in + Alcotest.(check string "fragment link" "

fragment

\n" html) + +let markdown_tests = [ + Alcotest.test_case "Simple" `Quick test_simple; + Alcotest.test_case "script header" `Quick test_html_script; + Alcotest.test_case "preserve span content" `Quick test_preserve_span_content; + Alcotest.test_case "Remove script" `Quick test_remove_script; + Alcotest.test_case "List with html block and markdown" `Quick test_list_with_html_block_and_markdown; + Alcotest.test_case "List with inline html and markdown" `Quick test_list_with_inline_html_and_markdown; + Alcotest.test_case "absolute link" `Quick test_absolute_link; + Alcotest.test_case "relative link" `Quick test_relative_link; + Alcotest.test_case "absolute image" `Quick test_absolute_image; + Alcotest.test_case "absolute image no alt" `Quick test_absolute_image_no_alt; + Alcotest.test_case "relative image" `Quick test_relative_image; + Alcotest.test_case "absolute image with script alt" `Quick test_absolute_image_script_alt; + Alcotest.test_case "fragment link" `Quick test_fragment_link; +] + +let () = + Alcotest.run "Markdown to HTML" [ + "markdown", markdown_tests + ]