Update builder-db with content addressing

This commit is contained in:
Reynir Björnsson 2023-09-27 15:03:46 +02:00 committed by Robur
parent 9f5458c8f4
commit 36afb35e08

View file

@ -16,6 +16,13 @@ let defer_foreign_keys =
Caqti_type.unit ->. Caqti_type.unit @@ Caqti_type.unit ->. Caqti_type.unit @@
"PRAGMA defer_foreign_keys = ON" "PRAGMA defer_foreign_keys = ON"
let build_artifacts_to_orphan =
Builder_db.Rep.id `build ->* Builder_db.Rep.cstruct @@
{| SELECT a.sha256 FROM build_artifact a
WHERE a.build = ? AND
(SELECT COUNT(*) FROM build_artifact a2
WHERE a2.sha256 = a.sha256 AND a2.build <> a.build) = 0 |}
let connect uri = let connect uri =
let* (module Db : Caqti_blocking.CONNECTION) = Caqti_blocking.connect ~tweaks_version:(1,8) uri in let* (module Db : Caqti_blocking.CONNECTION) = Caqti_blocking.connect ~tweaks_version:(1,8) uri in
let* () = Db.exec defer_foreign_keys () in let* () = Db.exec defer_foreign_keys () in
@ -36,6 +43,16 @@ let do_migrate dbpath =
let migrate () dbpath = let migrate () dbpath =
or_die 1 (do_migrate dbpath) or_die 1 (do_migrate dbpath)
let artifacts_dir datadir = Fpath.(datadir / "_artifacts")
let artifact_path sha256 =
let (`Hex sha256) = Hex.of_cstruct sha256 in
(* NOTE: [sha256] is 64 characters when it's a hex sha256 checksum *)
(* NOTE: We add the prefix to reduce the number of files in a directory - a
workaround for inferior filesystems. We can easily revert this by changing
this function and adding a migration. *)
let prefix = String.sub sha256 0 2 in
Fpath.(v "_artifacts" / prefix / sha256)
let user_mod action dbpath scrypt_n scrypt_r scrypt_p username unrestricted = let user_mod action dbpath scrypt_n scrypt_r scrypt_p username unrestricted =
let scrypt_params = Builder_web_auth.scrypt_params ?scrypt_n ?scrypt_r ?scrypt_p () in let scrypt_params = Builder_web_auth.scrypt_params ?scrypt_n ?scrypt_r ?scrypt_p () in
let r = let r =
@ -154,6 +171,18 @@ let job_remove () datadir jobname =
(match Bos.OS.Dir.delete ~recurse:true dir with (match Bos.OS.Dir.delete ~recurse:true dir with
| Ok _ -> () | Ok _ -> ()
| Error `Msg e -> Logs.warn (fun m -> m "failed to remove build directory %a: %s" Fpath.pp dir e)); | Error `Msg e -> Logs.warn (fun m -> m "failed to remove build directory %a: %s" Fpath.pp dir e));
let* () =
Db.iter_s build_artifacts_to_orphan
(fun sha256 ->
let p = Fpath.(v datadir // artifact_path sha256) in
match Bos.OS.Path.delete p with
| Ok () -> Ok ()
| Error `Msg e ->
Logs.warn (fun m -> m "failed to remove orphan artifact %a: %s"
Fpath.pp p e);
Ok ())
build_id
in
let* () = Db.exec Builder_db.Build_artifact.remove_by_build build_id in let* () = Db.exec Builder_db.Build_artifact.remove_by_build build_id in
Db.exec Builder_db.Build.remove build_id) Db.exec Builder_db.Build.remove build_id)
(Ok ()) (Ok ())
@ -214,12 +243,12 @@ let num_build_artifacts =
Caqti_type.unit ->! Caqti_type.int @@ Caqti_type.unit ->! Caqti_type.int @@
"SELECT count(*) FROM build_artifact" "SELECT count(*) FROM build_artifact"
let build_artifacts : (unit, string * Uuidm.t * (Fpath.t * Fpath.t * Cstruct.t * int64), [ `One | `Zero | `Many ]) Caqti_request.t = let build_artifacts : (unit, string * Uuidm.t * (Fpath.t * Cstruct.t * int64), [ `One | `Zero | `Many ]) Caqti_request.t =
Caqti_type.unit ->* Caqti_type.unit ->*
Caqti_type.(tup3 string Builder_db.Rep.uuid Caqti_type.(tup3 string Builder_db.Rep.uuid
(tup4 Builder_db.Rep.fpath Builder_db.Rep.fpath Builder_db.Rep.cstruct int64)) (tup3 Builder_db.Rep.fpath Builder_db.Rep.cstruct int64))
@@ @@
{| SELECT job.name, b.uuid, a.filepath, a.localpath, a.sha256, a.size {| SELECT job.name, b.uuid, a.filepath, a.sha256, a.size
FROM build_artifact a, build b, job FROM build_artifact a, build b, job
WHERE a.build = b.id AND b.job = job.id |} WHERE a.build = b.id AND b.job = job.id |}
@ -257,35 +286,29 @@ let verify_data_dir () datadir =
let idx = ref 0 in let idx = ref 0 in
fun () -> incr idx; if !idx mod 100 = 0 then Logs.info (fun m -> m "%d" !idx); fun () -> incr idx; if !idx mod 100 = 0 then Logs.info (fun m -> m "%d" !idx);
in in
let verify_job_and_uuid ?fpath job uuid path = let verify_job_and_uuid job uuid path =
match Fpath.segs path with match Fpath.segs path with
| job' :: uuid' :: tl -> | job' :: uuid' :: _tl ->
if String.equal job job' then () else Logs.warn (fun m -> m "job names do not match: %s vs %s" job job'); if String.equal job job' then () else Logs.warn (fun m -> m "job names do not match: %s vs %s" job job');
if String.equal (Uuidm.to_string uuid) uuid' then () else Logs.warn (fun m -> m "uuid does not match: %s vs %s" (Uuidm.to_string uuid) uuid'); if String.equal (Uuidm.to_string uuid) uuid' then () else Logs.warn (fun m -> m "uuid does not match: %s vs %s" (Uuidm.to_string uuid) uuid');
(match fpath, tl with
| None, _ -> ()
| Some f, "output" :: tl ->
if Fpath.equal (Fpath.v (String.concat "/" tl)) f then
()
else
Logs.err (fun m -> m "path (%a) and fpath (%a) do not match" Fpath.pp path Fpath.pp f)
| Some _, _ ->
Logs.err (fun m -> m "path is not of form <job>/<uuid>/output/<filename>: %a" Fpath.pp path))
| _ -> Logs.err (fun m -> m "path is not of form <job>/<uuid>/...: %a" Fpath.pp path) | _ -> Logs.err (fun m -> m "path is not of form <job>/<uuid>/...: %a" Fpath.pp path)
in in
let* () = let* () =
Db.iter_s build_artifacts (fun (job, uuid, (fpath, lpath, sha, size)) -> Db.iter_s build_artifacts (fun (_job, _uuid, (_fpath, sha256, size)) ->
progress (); progress ();
verify_job_and_uuid ~fpath job uuid lpath; let abs_path = Fpath.(v datadir // artifact_path sha256) in
let abs_path = Fpath.(v datadir // lpath) in
(match Bos.OS.File.read abs_path with (match Bos.OS.File.read abs_path with
| Error (`Msg msg) -> Logs.err (fun m -> m "file %a not present: %s" Fpath.pp abs_path msg) | Error (`Msg msg) -> Logs.err (fun m -> m "file %a not present: %s" Fpath.pp abs_path msg)
| Ok data -> | Ok data ->
files_tracked := FpathSet.add lpath !files_tracked; files_tracked := FpathSet.add (artifact_path sha256) !files_tracked;
let s = Int64.of_int (String.length data) in let s = Int64.of_int (String.length data) in
if s <> size then Logs.err (fun m -> m "File %a has different size (in DB %Lu on disk %Lu)" Fpath.pp abs_path size s); if s <> size then Logs.err (fun m -> m "File %a has different size (in DB %Lu on disk %Lu)" Fpath.pp abs_path size s);
let sh = Mirage_crypto.Hash.SHA256.digest (Cstruct.of_string data) in let sha256' = Mirage_crypto.Hash.SHA256.digest (Cstruct.of_string data) in
if not (Cstruct.equal sha sh) then Logs.err (fun m -> m "File %a has different hash (in DB %a on disk %a" Fpath.pp abs_path Cstruct.hexdump_pp sha Cstruct.hexdump_pp sh)) ; if not (Cstruct.equal sha256 sha256') then
Logs.err (fun m -> m "File %a has different hash (in DB %a on disk %a"
Fpath.pp abs_path
Hex.pp (Hex.of_cstruct sha256)
Hex.pp (Hex.of_cstruct sha256'))) ;
Ok () Ok ()
) () ) ()
in in
@ -460,7 +483,7 @@ module Verify_cache_dir = struct
AND ba_opam_switch.filepath = 'opam-switch' AND ba_opam_switch.filepath = 'opam-switch'
LEFT JOIN build_artifact AS ba_debug_bin ON LEFT JOIN build_artifact AS ba_debug_bin ON
ba_debug_bin.build = b.id ba_debug_bin.build = b.id
AND ba_debug_bin.localpath LIKE '%.debug' AND ba_debug_bin.filepath LIKE '%.debug'
|} |}
let check_viz_nonempty ~cachedir ~viz_typ ~hash = let check_viz_nonempty ~cachedir ~viz_typ ~hash =
@ -681,9 +704,9 @@ let extract_full () datadir dest uuid =
let out = console_of_string console in let out = console_of_string console in
let* artifacts = Db.collect_list Builder_db.Build_artifact.get_all_by_build build_id in let* artifacts = Db.collect_list Builder_db.Build_artifact.get_all_by_build build_id in
let* data = let* data =
List.fold_left (fun acc (_, { Builder_db.filepath; localpath; _ }) -> List.fold_left (fun acc (_, { Builder_db.filepath; sha256; _ }) ->
let* acc = acc in let* acc = acc in
let* data = Bos.OS.File.read Fpath.(v datadir // localpath) in let* data = Bos.OS.File.read Fpath.(v datadir // artifact_path sha256) in
Ok ((filepath, data) :: acc)) Ok ((filepath, data) :: acc))
(Ok []) (Ok [])
artifacts artifacts