From 071183ff6c6001f8a75ac4e541e9e2d2ba831e2b Mon Sep 17 00:00:00 2001 From: dinosaure Date: Tue, 21 Jun 2022 15:17:50 +0000 Subject: [PATCH] Add GZip support when we generate a tar archive (#119) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #116. Co-authored-by: Romain Calascibetta Co-authored-by: Reynir Björnsson Reviewed-on: https://git.robur.io/robur/builder-web/pulls/119 Co-authored-by: dinosaure Co-committed-by: dinosaure --- builder-web.opam | 1 + lib/builder_web.ml | 8 ++--- lib/dream_tar.ml | 78 ++++++++++++++++++++++++++++++++++++---------- lib/dune | 2 ++ 4 files changed, 68 insertions(+), 21 deletions(-) diff --git a/builder-web.opam b/builder-web.opam index 1e4db61..28d2649 100644 --- a/builder-web.opam +++ b/builder-web.opam @@ -52,6 +52,7 @@ depends: [ "tar" "owee" "solo5-elftool" + "decompress" ] synopsis: "Web interface for builder" diff --git a/lib/builder_web.ml b/lib/builder_web.ml index 8f6ef06..e4d9487 100644 --- a/lib/builder_web.ml +++ b/lib/builder_web.ml @@ -432,7 +432,7 @@ let routes ~datadir ~cachedir ~configdir = |> string_of_html |> Dream.html |> Lwt_result.ok in - let job_build_tar req = + let job_build_targz req = let _job_name = Dream.param req "job" and build = Dream.param req "build" in get_uuid build >>= fun build -> @@ -443,8 +443,8 @@ let routes ~datadir ~cachedir ~configdir = Ptime.diff build.finish Ptime.epoch |> Ptime.Span.to_int_s |> Option.to_result ~none:(`Msg "bad finish time") |> Result.map Int64.of_int |> Lwt.return |> if_error "Internal server error" >>= fun finish -> - Dream.stream ~headers:["Content-Type", "application/x-tar"] - (Dream_tar.tar_response datadir finish artifacts) + Dream.stream ~headers:["Content-Type", "application/tar+gzip"] + (Dream_tar.targz_response datadir finish artifacts) |> Lwt_result.ok in @@ -605,7 +605,7 @@ let routes ~datadir ~cachedir ~configdir = Dream.get "/job/:job/build/:build/script" (w (job_build_static_file `Script)); Dream.get "/job/:job/build/:build/console" (w (job_build_static_file `Console)); Dream.get "/failed-builds" (w failed_builds); - Dream.get "/job/:job/build/:build/all.tar" (w job_build_tar); + Dream.get "/job/:job/build/:build/all.tar.gz" (w job_build_targz); Dream.get "/hash" (w hash); Dream.get "/compare/:build_left/:build_right" (w compare_builds); Dream.post "/upload" (Authorization.authenticate (w upload)); diff --git a/lib/dream_tar.ml b/lib/dream_tar.ml index 3588700..fc58812 100644 --- a/lib/dream_tar.ml +++ b/lib/dream_tar.ml @@ -1,29 +1,47 @@ open Lwt.Infix module Writer = struct - type out_channel = Dream.stream + type out_channel = + { mutable gz : Gz.Def.encoder + ; ic : Cstruct.t + ; oc : Cstruct.t + ; stream : Dream.stream } + type 'a t = 'a Lwt.t - let really_write stream cs = - Dream.write stream (Cstruct.to_string cs) + + let really_write ({ oc; stream; _ } as state) cs = + let rec until_await gz = + match Gz.Def.encode gz with + | `Await gz -> state.gz <- gz ; Lwt.return_unit + | `Flush gz -> + let max = Cstruct.length oc - Gz.Def.dst_rem gz in + let str = Cstruct.copy oc 0 max in + Dream.write stream str >>= fun () -> + let { Cstruct.buffer; off= cs_off; len= cs_len; } = oc in + until_await (Gz.Def.dst gz buffer cs_off cs_len) + | `End _gz -> assert false in + if Cstruct.length cs = 0 + then Lwt.return_unit + else ( let { Cstruct.buffer; off; len; } = cs in + let gz = Gz.Def.src state.gz buffer off len in + until_await gz ) end module HW = Tar.HeaderWriter(Lwt)(Writer) -let write_block (header : Tar.Header.t) lpath stream = - HW.write ~level:Tar.Header.Ustar header stream >>= fun () -> +let write_block (header : Tar.Header.t) lpath ({ Writer.ic= buf; _ } as state) = + HW.write ~level:Tar.Header.Ustar header state >>= fun () -> Lwt_io.open_file ~mode:Lwt_io.Input (Fpath.to_string lpath) >>= fun ic -> - let buf_len = 4 * 1024 * 1024 in - let buf = Bytes.create buf_len in let rec loop () = - Lwt_io.read_into ic buf 0 buf_len >>= fun r -> - if r = 0 then - Lwt.return_unit - else - Dream.write stream (Bytes.sub_string buf 0 r) >>= fun () -> + let { Cstruct.buffer; off; len; } = buf in + Lwt_io.read_into_bigstring ic buffer off len >>= function + | 0 -> Lwt.return () + | len' -> + Writer.really_write state (Cstruct.sub buf 0 len') >>= fun () -> loop () in loop () >>= fun () -> - Dream.write stream (Cstruct.to_string (Tar.Header.zero_padding header)) + Writer.really_write state (Tar.Header.zero_padding header) let header_of_file mod_time (file : Builder_db.file) = let file_mode = if Fpath.is_prefix Fpath.(v "bin/") file.filepath then @@ -33,12 +51,38 @@ let header_of_file mod_time (file : Builder_db.file) = in Tar.Header.make ~file_mode ~mod_time (Fpath.to_string file.filepath) (Int64.of_int file.size) -let tar_response datadir finish (files : Builder_db.file list) (stream : Dream.stream) = +let targz_response datadir finish (files : Builder_db.file list) (stream : Dream.stream) = + let state = + let ic = Cstruct.create (4 * 4 * 1024) in + let oc = Cstruct.create 4096 in + let gz = + let w = De.Lz77.make_window ~bits:15 in + let q = De.Queue.create 0x1000 in + let mtime = Int32.of_float (Unix.gettimeofday ()) in + let gz = Gz.Def.encoder `Manual `Manual ~mtime Gz.Unix ~q ~w ~level:4 in + let { Cstruct.buffer; off; len; } = oc in + Gz.Def.dst gz buffer off len + in + { Writer.gz; ic; oc; stream; } + in Lwt_list.iter_s (fun file -> let hdr = header_of_file finish file in - write_block hdr Fpath.(datadir // file.localpath) stream) + write_block hdr Fpath.(datadir // file.localpath) state) files >>= fun () -> - Writer.really_write stream Tar.Header.zero_block >>= fun () -> - Writer.really_write stream Tar.Header.zero_block >>= fun () -> + Writer.really_write state Tar.Header.zero_block >>= fun () -> + Writer.really_write state Tar.Header.zero_block >>= fun () -> + (* assert (Gz.Def.encode gz = `Await) *) + let rec until_end gz = match Gz.Def.encode gz with + | `Await _gz -> assert false + | `Flush gz | `End gz as flush_or_end -> + let max = Cstruct.length state.oc - Gz.Def.dst_rem gz in + let str = Cstruct.copy state.oc 0 max in + Dream.write stream str >>= fun () -> match flush_or_end with + | `Flush gz -> + let { Cstruct.buffer; off= cs_off; len= cs_len; } = state.oc in + until_end (Gz.Def.dst gz buffer cs_off cs_len) + | `End _ -> Lwt.return_unit + in + until_end (Gz.Def.src state.gz De.bigstring_empty 0 0) >>= fun () -> Dream.flush stream >>= fun () -> Dream.close stream diff --git a/lib/dune b/lib/dune index 243595a..c0fab85 100644 --- a/lib/dune +++ b/lib/dune @@ -15,5 +15,7 @@ tar owee solo5-elftool + decompress.de + decompress.gz uri ))