From c1cfe52cb0e41017a11bb965643c3d92e3292f42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reynir=20Bj=C3=B6rnsson?= Date: Wed, 14 Aug 2024 16:13:35 +0200 Subject: [PATCH] Update to tar 3.0.0 Also embed a comment in the archive what software produced it. --- builder-web.opam | 3 +- lib/builder_web.ml | 8 ++- lib/dream_tar.ml | 144 +++++++++++++++++++++++---------------------- lib/dune | 2 +- 4 files changed, 84 insertions(+), 73 deletions(-) diff --git a/builder-web.opam b/builder-web.opam index 4b001cb..e457876 100644 --- a/builder-web.opam +++ b/builder-web.opam @@ -46,7 +46,8 @@ depends: [ "uri" "fmt" {>= "0.8.7"} "cmarkit" {>= "0.3.0"} - "tar" {< "3.0.0"} + "tar" {>= "3.0.0"} + "tar-unix" {>= "3.0.0"} "owee" "solo5-elftool" {>= "0.3.0"} "decompress" {>= "1.5.0"} diff --git a/lib/builder_web.ml b/lib/builder_web.ml index 54f78a4..588b03a 100644 --- a/lib/builder_web.ml +++ b/lib/builder_web.ml @@ -486,7 +486,13 @@ let routes ~datadir ~cachedir ~configdir ~expired_jobs = |> Option.to_result ~none:(`Msg "bad finish time") |> Result.map Int64.of_int |> Lwt.return |> if_error "Internal server error" >>= fun finish -> Dream.stream ~headers:["Content-Type", "application/tar+gzip"] - (Dream_tar.targz_response datadir finish artifacts) + (fun stream -> + let+ r = Dream_tar.targz_response datadir finish artifacts stream in + match r with + | Ok () -> () + | Error _ -> + Log.warn (fun m -> m "error assembling gzipped tar archive"); + ()) |> Lwt_result.ok in diff --git a/lib/dream_tar.ml b/lib/dream_tar.ml index 2cad5f6..ef11a34 100644 --- a/lib/dream_tar.ml +++ b/lib/dream_tar.ml @@ -1,47 +1,36 @@ -open Lwt.Infix +module High : sig + type t + type 'a s = 'a Lwt.t -module Writer = struct - type out_channel = - { mutable gz : Gz.Def.encoder - ; ic : Cstruct.t - ; oc : Cstruct.t - ; stream : Dream.stream } + external inj : 'a s -> ('a, t) Tar.io = "%identity" + external prj : ('a, t) Tar.io -> 'a s = "%identity" +end = struct + type t + type 'a s = 'a Lwt.t - type 'a t = 'a Lwt.t - - let really_write ({ oc; stream; _ } as state) cs = - let rec until_await gz = - match Gz.Def.encode gz with - | `Await gz -> state.gz <- gz ; Lwt.return_unit - | `Flush gz -> - let max = Cstruct.length oc - Gz.Def.dst_rem gz in - let str = Cstruct.to_string ~len:max oc in - Dream.write stream str >>= fun () -> - let { Cstruct.buffer; off= cs_off; len= cs_len; } = oc in - until_await (Gz.Def.dst gz buffer cs_off cs_len) - | `End _gz -> assert false in - if Cstruct.length cs = 0 - then Lwt.return_unit - else ( let { Cstruct.buffer; off; len; } = cs in - let gz = Gz.Def.src state.gz buffer off len in - until_await gz ) + external inj : 'a -> 'b = "%identity" + external prj : 'a -> 'b = "%identity" end -module HW = Tar.HeaderWriter(Lwt)(Writer) +let value v = Tar.High (High.inj v) -let write_block (header : Tar.Header.t) lpath ({ Writer.ic= buf; _ } as state) = - HW.write ~level:Tar.Header.Ustar header state >>= fun () -> - Lwt_io.open_file ~mode:Lwt_io.Input (Fpath.to_string lpath) >>= fun ic -> - let rec loop () = - let { Cstruct.buffer; off; len; } = buf in - Lwt_io.read_into_bigstring ic buffer off len >>= function - | 0 -> Lwt.return () - | len' -> - Writer.really_write state (Cstruct.sub buf 0 len') >>= fun () -> - loop () +let ok_value v = value (Lwt_result.ok v) + +let run t stream = + let rec run : type a. (a, 'err, High.t) Tar.t -> (a, 'err) result Lwt.t = + function + | Tar.Write str -> + (* Can this not fail?!? Obviously, it can, but we never know?? *) + Lwt_result.ok (Dream.write stream str) + | Tar.Seek _ | Tar.Read _ | Tar.Really_read _ -> assert false + | Tar.Return value -> Lwt.return value + | Tar.High value -> High.prj value + | Tar.Bind (x, f) -> + let open Lwt_result.Syntax in + let* v = run x in + run (f v) in - loop () >>= fun () -> - Writer.really_write state (Tar.Header.zero_padding header) + run t let header_of_file mod_time (file : Builder_db.file) = let file_mode = if Fpath.is_prefix Fpath.(v "bin/") file.filepath then @@ -51,38 +40,53 @@ let header_of_file mod_time (file : Builder_db.file) = in Tar.Header.make ~file_mode ~mod_time (Fpath.to_string file.filepath) (Int64.of_int file.size) -let targz_response datadir finish (files : Builder_db.file list) (stream : Dream.stream) = - let state = - let ic = Cstruct.create (4 * 4 * 1024) in - let oc = Cstruct.create 4096 in - let gz = - let w = De.Lz77.make_window ~bits:15 in - let q = De.Queue.create 0x1000 in - let mtime = Int32.of_float (Unix.gettimeofday ()) in - let gz = Gz.Def.encoder `Manual `Manual ~mtime Gz.Unix ~q ~w ~level:4 in - let { Cstruct.buffer; off; len; } = oc in - Gz.Def.dst gz buffer off len +let contents datadir file : unit -> (string option, _, _) Tar.t = + let state = ref `Initial in + let dispenser () = + let ( let* ) = Tar.( let* ) in + let src = Fpath.append datadir (Model.artifact_path file) in + let* state' = + match !state with + | `Initial -> + let* fd = ok_value (Lwt_io.open_file ~mode:Lwt_io.Input (Fpath.to_string src)) in + let s = `Active fd in + state := s; Tar.return (Ok s) + | `Active _ | `Closed as s -> Tar.return (Ok s) in - { Writer.gz; ic; oc; stream; } + match state' with + | `Closed -> Tar.return (Ok None) + | `Active fd -> + let* data = ok_value (Lwt_io.read ~count:65536 fd) in + if String.length data = 0 then begin + state := `Closed; + let* () = ok_value (Lwt_io.close fd) in + Tar.return (Ok None) + end else + Tar.return (Ok (Some data)) in - Lwt_list.iter_s (fun file -> - let hdr = header_of_file finish file in - write_block hdr Fpath.(datadir // Model.artifact_path file) state) - files >>= fun () -> - Writer.really_write state Tar.Header.zero_block >>= fun () -> - Writer.really_write state Tar.Header.zero_block >>= fun () -> - (* assert (Gz.Def.encode gz = `Await) *) - let rec until_end gz = match Gz.Def.encode gz with - | `Await _gz -> assert false - | `Flush gz | `End gz as flush_or_end -> - let max = Cstruct.length state.oc - Gz.Def.dst_rem gz in - let str = Cstruct.to_string ~len:max state.oc in - Dream.write stream str >>= fun () -> match flush_or_end with - | `Flush gz -> - let { Cstruct.buffer; off= cs_off; len= cs_len; } = state.oc in - until_end (Gz.Def.dst gz buffer cs_off cs_len) - | `End _ -> Lwt.return_unit + dispenser + +let entries datadir finish files = + let files = + List.map (fun file -> + let hdr = header_of_file finish file in + let level = Some Tar.Header.Posix in + (level, hdr, contents datadir file) + ) + files in - until_end (Gz.Def.src state.gz De.bigstring_empty 0 0) >>= fun () -> - Dream.flush stream >>= fun () -> - Dream.close stream + let files = ref files in + fun () -> match !files with + | [] -> Tar.return (Ok None) + | f :: fs -> files := fs; Tar.return (Ok (Some f)) + +let targz_response datadir finish files stream = + let entries : (_, _) Tar.entries = entries datadir finish files in + let global_hdr = + Tar.Header.Extended.make + ~comment:"Tar file produced by builder-web.%%VERSION_NUM%%" + () + in + let finish32 = Int64.to_int32 finish in + Logs.err (fun m -> m "finished at %ld (%Ld)" finish32 finish); + run (Tar_gz.out_gzipped ~level:9 ~mtime:finish32 Gz.Unix (Tar.out ~global_hdr entries)) stream diff --git a/lib/dune b/lib/dune index dd075a5..339158a 100644 --- a/lib/dune +++ b/lib/dune @@ -1,5 +1,5 @@ (library (name builder_web) (libraries builder builder_db dream tyxml bos duration hex caqti-lwt - opamdiff ptime.clock.os cmarkit tar owee solo5-elftool decompress.de + opamdiff ptime.clock.os cmarkit tar tar.gz tar-unix owee solo5-elftool decompress.de decompress.gz uri))