builder_db: add a verify-data-dir subcommand

This subcommand checks that all files in the build_artifact table
are present in the data directory. Also, their size and hash must
match.
This commit is contained in:
Robur 2021-07-07 13:33:26 +00:00
parent b09001916b
commit 7c4bf56da6

View file

@ -202,6 +202,43 @@ let verify_input_id () dbpath =
in in
or_die 1 r or_die 1 r
let build_artifacts : (unit, (Fpath.t * Fpath.t * Cstruct.t * int64) * [`build] Builder_db.Rep.id, [ `One | `Zero | `Many ]) Caqti_request.t =
Caqti_request.collect
Caqti_type.unit
Caqti_type.(tup2 (tup4 Builder_db.Rep.fpath Builder_db.Rep.fpath Builder_db.Rep.cstruct int64) (Builder_db.Rep.id `build))
{| SELECT filepath, localpath, sha256, size, build FROM build_artifact |}
let verify_data_dir () datadir =
let dbpath = datadir ^ "/builder.sqlite3" in
Logs.info (fun m -> m "connecting to %s" dbpath);
let r =
connect
(Uri.make ~scheme:"sqlite3" ~path:dbpath ~query:["create", ["false"]] ())
>>= fun (module Db : Caqti_blocking.CONNECTION) ->
Db.collect_list build_artifacts () >>= fun build_artifacts ->
Logs.info (fun m -> m "total: %d artifacts" (List.length build_artifacts));
List.iteri (fun idx ((fpath, lpath, sha, size), _build_id) ->
if idx mod 100 = 0 then Logs.info (fun m -> m "%d" idx);
(match Fpath.segs lpath with
| _job :: _uuid :: output :: tl ->
if output = "output" && Fpath.equal (Fpath.v (String.concat "/" tl)) fpath then
()
else
Logs.err (fun m -> m "lpath (%a) and fpath (%a) do not match" Fpath.pp lpath Fpath.pp lpath)
| _ -> Logs.err (fun m -> m "lpath is not of form <job>/<uuid>/<output>/<filename>: %a" Fpath.pp lpath));
let abs_path = Fpath.(v datadir // lpath) in
(match Bos.OS.File.read abs_path with
| Error (`Msg msg) -> Logs.err (fun m -> m "file %a not present: %s" Fpath.pp abs_path msg)
| Ok data ->
let s = Int64.of_int (String.length data) in
if s <> size then Logs.err (fun m -> m "File %a has different size (in DB %Lu on disk %Lu)" Fpath.pp abs_path size s);
let sh = Mirage_crypto.Hash.SHA256.digest (Cstruct.of_string data) in
if not (Cstruct.equal sha sh) then Logs.err (fun m -> m "File %a has different hash (in DB %a on disk %a" Fpath.pp abs_path Cstruct.hexdump_pp sha Cstruct.hexdump_pp sh))
) build_artifacts;
Ok ()
in
or_die 1 r
let help man_format cmds = function let help man_format cmds = function
| None -> `Help (man_format, None) | None -> `Help (man_format, None)
| Some cmd -> | Some cmd ->
@ -331,6 +368,11 @@ let verify_input_id_cmd =
(Cmdliner.Term.(pure verify_input_id $ setup_log $ dbpath), (Cmdliner.Term.(pure verify_input_id $ setup_log $ dbpath),
Cmdliner.Term.info ~doc "verify-input-id") Cmdliner.Term.info ~doc "verify-input-id")
let verify_data_dir_cmd =
let doc = "verify that the data directory is consistent with the build_artifact table" in
(Cmdliner.Term.(pure verify_data_dir $ setup_log $ datadir),
Cmdliner.Term.info ~doc "verify-data-dir")
let help_cmd = let help_cmd =
let topic = let topic =
let doc = "Command to get help on" in let doc = "Command to get help on" in
@ -352,5 +394,5 @@ let () =
[help_cmd; migrate_cmd; [help_cmd; migrate_cmd;
user_add_cmd; user_update_cmd; user_remove_cmd; user_list_cmd; user_disable_cmd; user_add_cmd; user_update_cmd; user_remove_cmd; user_list_cmd; user_disable_cmd;
access_add_cmd; access_remove_cmd; job_remove_cmd; access_add_cmd; access_remove_cmd; job_remove_cmd;
verify_input_id_cmd ] verify_input_id_cmd; verify_data_dir_cmd ]
|> Cmdliner.Term.exit |> Cmdliner.Term.exit