Merge pull request 'content addressing' (#174) from content-addressing into main

Reviewed-on: #174
This commit is contained in:
Hannes Mehnert 2024-01-09 15:08:10 +00:00
commit 13dd238843
15 changed files with 381 additions and 125 deletions

View file

@ -16,6 +16,13 @@ let defer_foreign_keys =
Caqti_type.unit ->. Caqti_type.unit @@
"PRAGMA defer_foreign_keys = ON"
let build_artifacts_to_orphan =
Builder_db.Rep.id `build ->* Builder_db.Rep.cstruct @@
{| SELECT a.sha256 FROM build_artifact a
WHERE a.build = ? AND
(SELECT COUNT(*) FROM build_artifact a2
WHERE a2.sha256 = a.sha256 AND a2.build <> a.build) = 0 |}
let connect uri =
let* (module Db : Caqti_blocking.CONNECTION) = Caqti_blocking.connect ~tweaks_version:(1,8) uri in
let* () = Db.exec defer_foreign_keys () in
@ -36,6 +43,16 @@ let do_migrate dbpath =
let migrate () dbpath =
or_die 1 (do_migrate dbpath)
let artifacts_dir datadir = Fpath.(datadir / "_artifacts")
let artifact_path sha256 =
let (`Hex sha256) = Hex.of_cstruct sha256 in
(* NOTE: [sha256] is 64 characters when it's a hex sha256 checksum *)
(* NOTE: We add the prefix to reduce the number of files in a directory - a
workaround for inferior filesystems. We can easily revert this by changing
this function and adding a migration. *)
let prefix = String.sub sha256 0 2 in
Fpath.(v "_artifacts" / prefix / sha256)
let user_mod action dbpath scrypt_n scrypt_r scrypt_p username unrestricted =
let scrypt_params = Builder_web_auth.scrypt_params ?scrypt_n ?scrypt_r ?scrypt_p () in
let r =
@ -154,6 +171,18 @@ let job_remove () datadir jobname =
(match Bos.OS.Dir.delete ~recurse:true dir with
| Ok _ -> ()
| Error `Msg e -> Logs.warn (fun m -> m "failed to remove build directory %a: %s" Fpath.pp dir e));
let* () =
Db.iter_s build_artifacts_to_orphan
(fun sha256 ->
let p = Fpath.(v datadir // artifact_path sha256) in
match Bos.OS.Path.delete p with
| Ok () -> Ok ()
| Error `Msg e ->
Logs.warn (fun m -> m "failed to remove orphan artifact %a: %s"
Fpath.pp p e);
Ok ())
build_id
in
let* () = Db.exec Builder_db.Build_artifact.remove_by_build build_id in
Db.exec Builder_db.Build.remove build_id)
(Ok ())
@ -214,12 +243,12 @@ let num_build_artifacts =
Caqti_type.unit ->! Caqti_type.int @@
"SELECT count(*) FROM build_artifact"
let build_artifacts : (unit, string * Uuidm.t * (Fpath.t * Fpath.t * Cstruct.t * int64), [ `One | `Zero | `Many ]) Caqti_request.t =
let build_artifacts : (unit, string * Uuidm.t * (Fpath.t * Cstruct.t * int64), [ `One | `Zero | `Many ]) Caqti_request.t =
Caqti_type.unit ->*
Caqti_type.(tup3 string Builder_db.Rep.uuid
(tup4 Builder_db.Rep.fpath Builder_db.Rep.fpath Builder_db.Rep.cstruct int64))
(tup3 Builder_db.Rep.fpath Builder_db.Rep.cstruct int64))
@@
{| SELECT job.name, b.uuid, a.filepath, a.localpath, a.sha256, a.size
{| SELECT job.name, b.uuid, a.filepath, a.sha256, a.size
FROM build_artifact a, build b, job
WHERE a.build = b.id AND b.job = job.id |}
@ -257,35 +286,32 @@ let verify_data_dir () datadir =
let idx = ref 0 in
fun () -> incr idx; if !idx mod 100 = 0 then Logs.info (fun m -> m "%d" !idx);
in
let verify_job_and_uuid ?fpath job uuid path =
let verify_job_and_uuid job uuid path =
match Fpath.segs path with
| job' :: uuid' :: tl ->
| job' :: uuid' :: _tl ->
if String.equal job job' then () else Logs.warn (fun m -> m "job names do not match: %s vs %s" job job');
if String.equal (Uuidm.to_string uuid) uuid' then () else Logs.warn (fun m -> m "uuid does not match: %s vs %s" (Uuidm.to_string uuid) uuid');
(match fpath, tl with
| None, _ -> ()
| Some f, "output" :: tl ->
if Fpath.equal (Fpath.v (String.concat "/" tl)) f then
()
else
Logs.err (fun m -> m "path (%a) and fpath (%a) do not match" Fpath.pp path Fpath.pp f)
| Some _, _ ->
Logs.err (fun m -> m "path is not of form <job>/<uuid>/output/<filename>: %a" Fpath.pp path))
| _ -> Logs.err (fun m -> m "path is not of form <job>/<uuid>/...: %a" Fpath.pp path)
in
let* () =
Db.iter_s build_artifacts (fun (job, uuid, (fpath, lpath, sha, size)) ->
Db.iter_s build_artifacts (fun (_job, _uuid, (_fpath, sha256, size)) ->
progress ();
verify_job_and_uuid ~fpath job uuid lpath;
let abs_path = Fpath.(v datadir // lpath) in
if not (FpathSet.mem (artifact_path sha256) !files_tracked) then
let abs_path = Fpath.(v datadir // artifact_path sha256) in
(match Bos.OS.File.read abs_path with
| Error (`Msg msg) -> Logs.err (fun m -> m "file %a not present: %s" Fpath.pp abs_path msg)
| Ok data ->
files_tracked := FpathSet.add lpath !files_tracked;
files_tracked := FpathSet.add (artifact_path sha256) !files_tracked;
let s = Int64.of_int (String.length data) in
if s <> size then Logs.err (fun m -> m "File %a has different size (in DB %Lu on disk %Lu)" Fpath.pp abs_path size s);
let sh = Mirage_crypto.Hash.SHA256.digest (Cstruct.of_string data) in
if not (Cstruct.equal sha sh) then Logs.err (fun m -> m "File %a has different hash (in DB %a on disk %a" Fpath.pp abs_path Cstruct.hexdump_pp sha Cstruct.hexdump_pp sh)) ;
let sha256' = Mirage_crypto.Hash.SHA256.digest (Cstruct.of_string data) in
if not (Cstruct.equal sha256 sha256') then
Logs.err (fun m -> m "File %a has different hash (in DB %a on disk %a)"
Fpath.pp abs_path
Hex.pp (Hex.of_cstruct sha256)
Hex.pp (Hex.of_cstruct sha256'))) ;
Ok ()
else
Ok ()
) ()
in
@ -460,7 +486,7 @@ module Verify_cache_dir = struct
AND ba_opam_switch.filepath = 'opam-switch'
LEFT JOIN build_artifact AS ba_debug_bin ON
ba_debug_bin.build = b.id
AND ba_debug_bin.localpath LIKE '%.debug'
AND ba_debug_bin.filepath LIKE '%.debug'
|}
let check_viz_nonempty ~cachedir ~viz_typ ~hash =
@ -681,9 +707,9 @@ let extract_full () datadir dest uuid =
let out = console_of_string console in
let* artifacts = Db.collect_list Builder_db.Build_artifact.get_all_by_build build_id in
let* data =
List.fold_left (fun acc (_, { Builder_db.filepath; localpath; _ }) ->
List.fold_left (fun acc (_, { Builder_db.filepath; sha256; _ }) ->
let* acc = acc in
let* data = Bos.OS.File.read Fpath.(v datadir // localpath) in
let* data = Bos.OS.File.read Fpath.(v datadir // artifact_path sha256) in
Ok ((filepath, data) :: acc))
(Ok [])
artifacts

View file

@ -180,6 +180,7 @@ let () =
actions (module M20211105);
actions (module M20220509);
actions (module M20230911);
actions (module M20230914);
])
|> Cmd.eval
|> exit

162
bin/migrations/m20230914.ml Normal file
View file

@ -0,0 +1,162 @@
let new_version = 18L and old_version = 17L
and identifier = "2023-09-14"
and migrate_doc = "Artifacts are stored content-addressed in the filesystem"
and rollback_doc = "Artifacts are stored under their build's job name and uuid"
open Grej.Syntax
let new_build_artifact =
Caqti_type.unit ->. Caqti_type.unit @@
{| CREATE TABLE new_build_artifact (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
filepath TEXT NOT NULL,
sha256 BLOB NOT NULL,
size INTEGER NOT NULL,
build INTEGER NOT NULL,
FOREIGN KEY(build) REFERENCES build(id),
UNIQUE(build, filepath)
)
|}
let old_build_artifact =
Caqti_type.unit ->. Caqti_type.unit @@
{| CREATE TABLE new_build_artifact (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
filepath TEXT NOT NULL, -- the path as in the build
localpath TEXT NOT NULL, -- local path to the file on disk
sha256 BLOB NOT NULL,
size INTEGER NOT NULL,
build INTEGER NOT NULL,
FOREIGN KEY(build) REFERENCES build(id),
UNIQUE(build, filepath)
)
|}
let idx_build_artifact_sha256 =
Caqti_type.unit ->. Caqti_type.unit @@
"CREATE INDEX idx_build_artifact_sha256 ON build_artifact(sha256)"
let idx_build_artifact_build =
Caqti_type.unit ->. Caqti_type.unit @@
"CREATE INDEX idx_build_artifact_build ON build_artifact(build)"
let copy_new_build_artifact =
Caqti_type.(unit ->. unit) @@
{| INSERT INTO new_build_artifact(id, filepath, sha256, size, build)
SELECT id, filepath, sha256, size, build
FROM build_artifact
|}
let copy_old_build_artifact =
Caqti_type.(unit ->. unit) @@
{| INSERT INTO new_build_artifact(id, filepath, localpath, sha256, size, build)
SELECT a.id, a.filepath,
j.name || '/' || b.uuid || '/output/' || a.filepath,
a.sha256, a.size, a.build
FROM build_artifact a, job j, build b
WHERE b.id = a.build AND j.id = b.job
|}
let new_build_artifact_paths =
Caqti_type.unit ->* Caqti_type.(tup2 string string) @@
{| SELECT localpath, '_artifacts/' || substr(lower(hex(sha256)), 1, 2) || '/' || lower(hex(sha256))
FROM build_artifact
|}
let old_build_artifact_paths =
Caqti_type.unit ->* Caqti_type.(tup2 string string) @@
{| SELECT '_artifacts/' || substr(lower(hex(a.sha256)), 1, 2) || '/' || lower(hex(a.sha256)),
j.name || '/' || b.uuid || '/output/' || a.filepath
FROM build_artifact a, job j, build b
WHERE b.id = a.build AND j.id = b.job
|}
let drop_build_artifact =
Caqti_type.(unit ->. unit) @@
"DROP TABLE build_artifact"
let rename_build_artifact =
Caqti_type.(unit ->. unit) @@
"ALTER TABLE new_build_artifact RENAME TO build_artifact"
let move_paths ?force datadir (old_path, new_path) =
let old_path = Fpath.(datadir // v old_path) and new_path = Fpath.(datadir // v new_path) in
let* _created = Bos.OS.Dir.create (Fpath.parent new_path) in
Bos.OS.Path.move ?force old_path new_path
let copy_paths datadir (old_path, new_path) =
let old_path = Fpath.(datadir // v old_path) and new_path = Fpath.(datadir // v new_path) in
let new_path_tmp = Fpath.(new_path + "tmp") in
let* _created = Bos.OS.Dir.create (Fpath.parent new_path) in
let cmd = Bos.Cmd.(v "cp" % p old_path % p new_path_tmp) in
let* () =
match Bos.OS.Cmd.run_status cmd with
| Ok `Exited 0 ->
Ok ()
| Ok status ->
let _ = Bos.OS.Path.delete new_path_tmp in
Error (`Msg (Fmt.str "cp failed: %a" Bos.OS.Cmd.pp_status status))
| Error _ as e ->
let _ = Bos.OS.Path.delete new_path_tmp in
e
in
Bos.OS.Path.move ~force:true new_path_tmp new_path
let migrate datadir (module Db : Caqti_blocking.CONNECTION) =
let* () = Grej.check_version ~user_version:old_version (module Db) in
let* () = Db.exec new_build_artifact () in
let* () = Db.exec copy_new_build_artifact () in
let* () = Db.iter_s new_build_artifact_paths (move_paths ~force:true datadir) () in
let* () = Db.exec drop_build_artifact () in
let* () = Db.exec rename_build_artifact () in
let* () = Db.exec idx_build_artifact_sha256 () in
let* () = Db.exec idx_build_artifact_build () in
Db.exec (Grej.set_version new_version) ()
let rollback datadir (module Db : Caqti_blocking.CONNECTION) =
let* () = Grej.check_version ~user_version:new_version (module Db) in
let* () = Db.exec old_build_artifact () in
let* () = Db.exec copy_old_build_artifact () in
let* () = Db.iter_s old_build_artifact_paths (copy_paths datadir) () in
let* () =
Db.iter_s old_build_artifact_paths
(fun (old_path, _new_path) ->
Bos.OS.Path.delete Fpath.(datadir // v old_path))
()
in
let* () = Db.exec drop_build_artifact () in
let* () = Db.exec rename_build_artifact () in
let* () = Db.exec idx_build_artifact_sha256 () in
Db.exec (Grej.set_version old_version) ()
(* migration failed but managed to move *some* files *)
let fixup_migrate datadir (module Db : Caqti_blocking.CONNECTION) =
let* () = Grej.check_version ~user_version:old_version (module Db) in
let* () =
Db.iter_s new_build_artifact_paths
(fun (old_path, new_path) ->
let* old_exists = Bos.OS.Path.exists Fpath.(datadir // v old_path) in
let* new_exists = Bos.OS.Path.exists Fpath.(datadir // v new_path) in
if new_exists && not old_exists then
copy_paths datadir (new_path, old_path)
else Ok ())
()
in
Db.iter_s new_build_artifact_paths
(fun (_old_path, new_path) ->
Bos.OS.Path.delete Fpath.(datadir // v new_path))
()
(* rollback failed but some or all artifacts were copied *)
let fixup_rollback datadir (module Db : Caqti_blocking.CONNECTION) =
let* () = Grej.check_version ~user_version:new_version (module Db) in
Db.iter_s old_build_artifact_paths
(fun (old_path, new_path) ->
let* old_exists = Bos.OS.Path.exists Fpath.(datadir // v old_path) in
if old_exists then
Bos.OS.Path.delete Fpath.(datadir // v new_path)
else
move_paths datadir (new_path, old_path))
()

View file

@ -11,7 +11,6 @@ type 'a id = 'a Rep.id
type file = Rep.file = {
filepath : Fpath.t;
localpath : Fpath.t;
sha256 : Cstruct.t;
size : int;
}
@ -140,7 +139,6 @@ module Build_artifact = struct
{| CREATE TABLE build_artifact (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
filepath TEXT NOT NULL, -- the path as in the build
localpath TEXT NOT NULL, -- local path to the file on disk
sha256 BLOB NOT NULL,
size INTEGER NOT NULL,
build INTEGER NOT NULL,
@ -156,13 +154,13 @@ module Build_artifact = struct
let get =
id `build_artifact ->! file @@
{| SELECT filepath, localpath, sha256, size
{| SELECT filepath, sha256, size
FROM build_artifact WHERE id = ? |}
let get_by_build_uuid =
Caqti_type.tup2 uuid fpath ->? Caqti_type.tup2 (id `build_artifact) file @@
{| SELECT build_artifact.id, build_artifact.filepath,
build_artifact.localpath, build_artifact.sha256, build_artifact.size
build_artifact.sha256, build_artifact.size
FROM build_artifact
INNER JOIN build ON build.id = build_artifact.build
WHERE build.uuid = ? AND build_artifact.filepath = ?
@ -170,12 +168,16 @@ module Build_artifact = struct
let get_all_by_build =
id `build ->* Caqti_type.(tup2 (id `build_artifact) file) @@
"SELECT id, filepath, localpath, sha256, size FROM build_artifact WHERE build = ?"
"SELECT id, filepath, sha256, size FROM build_artifact WHERE build = ?"
let exists =
cstruct ->! Caqti_type.bool @@
"SELECT EXISTS(SELECT 1 FROM build_artifact WHERE sha256 = ?)"
let add =
Caqti_type.(tup2 file (id `build)) ->. Caqti_type.unit @@
"INSERT INTO build_artifact (filepath, localpath, sha256, size, build) \
VALUES (?, ?, ?, ?, ?)"
"INSERT INTO build_artifact (filepath, sha256, size, build) \
VALUES (?, ?, ?, ?)"
let remove_by_build =
id `build ->. Caqti_type.unit @@
@ -316,7 +318,7 @@ module Build = struct
b.uuid, b.start_d, b.start_ps, b.finish_d, b.finish_ps,
b.result_code, b.result_msg, b.console, b.script,
b.platform, b.main_binary, b.input_id, b.user, b.job,
a.filepath, a.localpath, a.sha256, a.size
a.filepath, a.sha256, a.size
FROM build b, build_artifact a
WHERE b.main_binary = a.id AND b.job = $1 AND b.platform = $2
AND b.main_binary IS NOT NULL
@ -442,7 +444,7 @@ module Build = struct
{| SELECT b.uuid, b.start_d, b.start_ps, b.finish_d, b.finish_ps,
b.result_code, b.result_msg, b.console, b.script,
b.platform, b.main_binary, b.input_id, b.user, b.job,
a.filepath, a.localpath, a.sha256, a.size
a.filepath, a.sha256, a.size
FROM build_artifact a
INNER JOIN build b ON b.id = a.build
WHERE a.sha256 = ?
@ -593,6 +595,8 @@ let migrate = [
"CREATE INDEX idx_build_main_binary ON build(main_binary)";
Caqti_type.unit ->. Caqti_type.unit @@
"CREATE INDEX idx_build_artifact_sha256 ON build_artifact(sha256)";
Caqti_type.unit ->. Caqti_type.unit @@
"CREATE INDEX idx_build_artifact_build ON build_artifact(build)";
set_current_version;
set_application_id;
]
@ -606,6 +610,8 @@ let rollback = [
Build.rollback;
Job.rollback;
Caqti_type.unit ->. Caqti_type.unit @@
"DROP INDEX IF EXISTS idx_build_artifact_build";
Caqti_type.unit ->. Caqti_type.unit @@
"DROP INDEX IF EXISTS idx_build_artifact_sha256";
Caqti_type.unit ->. Caqti_type.unit @@
"DROP INDEX IF EXISTS idx_build_failed";

View file

@ -3,7 +3,6 @@ module Rep : sig
type 'a id
type file = {
filepath : Fpath.t;
localpath : Fpath.t;
sha256 : Cstruct.t;
size : int;
}
@ -23,7 +22,6 @@ type 'a id = 'a Rep.id
type file = Rep.file = {
filepath : Fpath.t;
localpath : Fpath.t;
sha256 : Cstruct.t;
size : int;
}
@ -87,6 +85,7 @@ module Build_artifact : sig
Caqti_request.t
val get_all_by_build :
([`build] id, [`build_artifact] id * file, [ `Many | `One | `Zero ]) Caqti_request.t
val exists : (Cstruct.t, bool, [ `One ]) Caqti_request.t
val add :
(file * [`build] id, unit, [ `Zero ]) Caqti_request.t
val remove_by_build :

View file

@ -30,7 +30,6 @@ let id_to_int64 (id : 'a id) : int64 = id
type file = {
filepath : Fpath.t;
localpath : Fpath.t;
sha256 : Cstruct.t;
size : int;
}
@ -63,24 +62,24 @@ let cstruct =
Caqti_type.custom ~encode ~decode Caqti_type.octets
let file =
let encode { filepath; localpath; sha256; size } =
Ok (filepath, localpath, sha256, size) in
let decode (filepath, localpath, sha256, size) =
Ok { filepath; localpath; sha256; size } in
Caqti_type.custom ~encode ~decode Caqti_type.(tup4 fpath fpath cstruct int)
let encode { filepath; sha256; size } =
Ok (filepath, sha256, size) in
let decode (filepath, sha256, size) =
Ok { filepath; sha256; size } in
Caqti_type.custom ~encode ~decode Caqti_type.(tup3 fpath cstruct int)
let file_opt =
let rep = Caqti_type.(tup4 (option fpath) (option fpath) (option cstruct) (option int)) in
let rep = Caqti_type.(tup3 (option fpath) (option cstruct) (option int)) in
let encode = function
| Some { filepath; localpath; sha256; size } ->
Ok (Some filepath, Some localpath, Some sha256, Some size)
| Some { filepath; sha256; size } ->
Ok (Some filepath, Some sha256, Some size)
| None ->
Ok (None, None, None, None)
Ok (None, None, None)
in
let decode = function
| (Some filepath, Some localpath, Some sha256, Some size) ->
Ok (Some { filepath; localpath; sha256; size })
| (None, None, None, None) ->
| (Some filepath, Some sha256, Some size) ->
Ok (Some { filepath; sha256; size })
| (None, None, None) ->
Ok None
| _ ->
(* This should not happen if the database is well-formed *)

View file

@ -211,9 +211,9 @@ module Viz_aux = struct
match typ with
| `Treemap ->
let debug_binary =
let bin = Fpath.base main_binary.localpath in
let bin = Fpath.base main_binary.filepath in
List.find_opt
(fun p -> Fpath.(equal (bin + "debug") (base p.localpath)))
(fun p -> Fpath.(equal (bin + "debug") (base p.filepath)))
artifacts
in
begin
@ -226,7 +226,7 @@ module Viz_aux = struct
| `Dependencies ->
let opam_switch =
List.find_opt
(fun p -> Fpath.(equal (v "opam-switch") (base p.localpath)))
(fun p -> Fpath.(equal (v "opam-switch") (base p.filepath)))
artifacts
in
Model.not_found opam_switch
@ -435,8 +435,8 @@ let routes ~datadir ~cachedir ~configdir ~expired_jobs =
| _ ->
Model.build_artifact_data datadir file
|> if_error "Error getting build artifact"
~log:(fun e -> Log.warn (fun m -> m "Error getting build artifact data for file %a in %a: %a"
Fpath.pp file.Builder_db.filepath Fpath.pp file.Builder_db.localpath
~log:(fun e -> Log.warn (fun m -> m "Error getting build artifact data for file %a: %a"
Fpath.pp file.Builder_db.filepath
pp_error e)) >>= fun data ->
let headers = [
"Content-Type", mime_lookup file.Builder_db.filepath;

View file

@ -67,7 +67,7 @@ let targz_response datadir finish (files : Builder_db.file list) (stream : Dream
in
Lwt_list.iter_s (fun file ->
let hdr = header_of_file finish file in
write_block hdr Fpath.(datadir // file.localpath) state)
write_block hdr Fpath.(datadir // Model.artifact_path file) state)
files >>= fun () ->
Writer.really_write state Tar.Header.zero_block >>= fun () ->
Writer.really_write state Tar.Header.zero_block >>= fun () ->

View file

@ -19,6 +19,14 @@ let not_found = function
| Some v -> Lwt_result.return v
let staging datadir = Fpath.(datadir / "_staging")
let artifact_path artifact =
let (`Hex sha256) = Hex.of_cstruct artifact.Builder_db.sha256 in
(* NOTE: [sha256] is 64 characters when it's a hex sha256 checksum *)
(* NOTE: We add the prefix to reduce the number of files in a directory - a
workaround for inferior filesystems. We can easily revert this by changing
this function and adding a migration. *)
let prefix = String.sub sha256 0 2 in
Fpath.(v "_artifacts" / prefix / sha256)
let read_file datadir filepath =
let filepath = Fpath.(datadir // filepath) in
@ -44,14 +52,14 @@ let build_artifact_by_id id (module Db : CONN) =
Db.find Builder_db.Build_artifact.get id
let build_artifact_data datadir file =
read_file datadir file.Builder_db.localpath
read_file datadir (artifact_path file)
let build_artifacts build (module Db : CONN) =
Db.collect_list Builder_db.Build_artifact.get_all_by_build build >|=
List.map snd
let solo5_manifest datadir file =
let buf = Owee_buf.map_binary Fpath.(to_string (datadir // file.Builder_db.localpath)) in
let buf = Owee_buf.map_binary Fpath.(to_string (datadir // artifact_path file)) in
Solo5_elftool.query_manifest buf |> Result.to_option
let platforms_of_job id (module Db : CONN) =
@ -196,11 +204,11 @@ let cleanup_staging datadir (module Db : Caqti_lwt.CONNECTION) =
(cleanup_staged staged))
stageds
let save file data =
let save path data =
let open Lwt.Infix in
Lwt.catch
(fun () ->
Lwt_io.open_file ~mode:Lwt_io.Output (Fpath.to_string file) >>= fun oc ->
Lwt_io.open_file ~mode:Lwt_io.Output (Fpath.to_string path) >>= fun oc ->
Lwt_io.write oc data >>= fun () ->
Lwt_io.close oc
|> Lwt_result.ok)
@ -209,33 +217,29 @@ let save file data =
Lwt_result.fail (`Msg (Unix.error_message e))
| e -> Lwt.fail e)
let save_file dir staging (filepath, data) =
let size = String.length data in
let sha256 = Mirage_crypto.Hash.SHA256.digest (Cstruct.of_string data) in
let localpath = Fpath.append dir filepath in
let destpath = Fpath.append staging filepath in
Lwt_result.lift (Bos.OS.Dir.create (Fpath.parent destpath)) >>= fun _ ->
save destpath data >|= fun () ->
{ Builder_db.filepath; localpath; sha256; size }
let save_files dir staging files =
let save_artifacts staging artifacts =
List.fold_left
(fun r file ->
r >>= fun acc ->
save_file dir staging file >>= fun file ->
Lwt_result.return (file :: acc))
(Lwt_result.return [])
files
(fun r (file, data) ->
r >>= fun () ->
let (`Hex sha256) = Hex.of_cstruct file.Builder_db.sha256 in
let destpath = Fpath.(staging / sha256) in
save destpath data)
(Lwt_result.return ())
artifacts
let save_all staging_dir (job : Builder.script_job) uuid artifacts =
let build_dir = Fpath.(v job.Builder.name / Uuidm.to_string uuid) in
let output_dir = Fpath.(build_dir / "output")
and staging_output_dir = Fpath.(staging_dir / "output") in
Lwt.return (Bos.OS.Dir.create staging_output_dir) >>= fun _ ->
save_files output_dir staging_output_dir artifacts >>= fun artifacts ->
Lwt_result.return artifacts
let commit_files datadir staging_dir job_name uuid =
let commit_files datadir staging_dir job_name uuid artifacts =
(* First we move the artifacts *)
List.fold_left
(fun r artifact ->
r >>= fun () ->
let (`Hex sha256) = Hex.of_cstruct artifact.Builder_db.sha256 in
let src = Fpath.(staging_dir / sha256) in
let dest = Fpath.(datadir // artifact_path artifact) in
Lwt.return (Bos.OS.Dir.create (Fpath.parent dest)) >>= fun _created ->
Lwt.return (Bos.OS.Path.move ~force:true src dest))
(Lwt_result.return ())
artifacts >>= fun () ->
(* Now the staging dir only contains script & console *)
let job_dir = Fpath.(datadir / job_name) in
let dest = Fpath.(job_dir / Uuidm.to_string uuid) in
Lwt.return (Bos.OS.Dir.create job_dir) >>= fun _ ->
@ -324,6 +328,25 @@ let prepare_staging staging_dir =
then Lwt_result.fail (`Msg "build directory already exists")
else Lwt_result.return ()
(* saving:
- for each artifact compute its sha256 checksum -- calling Lwt.pause in
between
- lookup artifact sha256 in the database and filter them out of the list: not_in_db
- mkdir -p _staging/uuid/
- save console & script to _staging/uuid/
- save each artifact in not_in_db as _staging/uuid/sha256
committing:
- for each artifact mv _staging/uuid/sha256 _artifacts/sha256
(or _artifacts/prefix(sha256)/sha256 where prefix(sha256) is the first two hex digits in sha256)
- now _staging/uuid only contains console & script so we mv _staging/uuid _staging/job/uuid
potential issues:
- race condition in uploading same artifact:
* if the artifact already exists in the database and thus filesystem then nothing is done
* if the artifact is added to the database and/or filesystem we atomically overwrite it
- input_id depends on a sort order?
*)
let add_build
~datadir
~cachedir
@ -344,16 +367,35 @@ let add_build
e)
x
in
let artifacts_to_preserve =
let not_interesting p =
String.equal (Fpath.basename p) "README.md" || String.equal (Fpath.get_ext p) ".build-hashes"
in
List.filter (fun (p, _) -> not (not_interesting p)) raw_artifacts
in
begin
List.fold_left
(fun r (filepath, data) ->
r >>= fun acc ->
if not_interesting filepath then
Lwt_result.return acc
else
let sha256 = Mirage_crypto.Hash.SHA256.digest (Cstruct.of_string data)
and size = String.length data in
Lwt_result.ok (Lwt.pause ()) >|= fun () ->
({ filepath; sha256; size }, data) :: acc)
(Lwt_result.return [])
raw_artifacts
end >>= fun artifacts ->
or_cleanup (prepare_staging staging_dir) >>= fun () ->
or_cleanup (save_console_and_script staging_dir job_name uuid console job.Builder.script)
>>= fun (console, script) ->
or_cleanup (save_all staging_dir job uuid artifacts_to_preserve) >>= fun artifacts ->
List.fold_left
(fun r ((f, _) as artifact) ->
r >>= fun acc ->
Db.find Builder_db.Build_artifact.exists f.sha256 >|= fun exists ->
if exists then acc else artifact :: acc)
(Lwt_result.return [])
artifacts >>= fun artifacts_to_save ->
or_cleanup (save_artifacts staging_dir artifacts_to_save) >>= fun () ->
let artifacts = List.map fst artifacts in
let r =
Db.start () >>= fun () ->
Db.exec Job.try_add job_name >>= fun () ->
@ -422,7 +464,7 @@ let add_build
(Lwt_result.return ())
remaining_artifacts_to_add >>= fun () ->
Db.commit () >>= fun () ->
commit_files datadir staging_dir job_name uuid >|= fun () ->
commit_files datadir staging_dir job_name uuid artifacts >|= fun () ->
main_binary
in
Lwt_result.bind_lwt_error (or_cleanup r)
@ -451,7 +493,8 @@ let add_build
"--uuid=" ^ uuid ; "--platform=" ^ platform ;
"--cache-dir=" ^ Fpath.to_string cachedir ;
"--data-dir=" ^ Fpath.to_string datadir ;
fp_str main_binary.localpath ])
"--main-binary-filepath=" ^ Fpath.to_string main_binary.filepath ;
fp_str Fpath.(datadir // artifact_path main_binary) ])
in
Log.debug (fun m -> m "executing hooks with %s" args);
let dir = Fpath.(configdir / "upload-hooks") in

View file

@ -5,6 +5,7 @@ val pp_error : Format.formatter -> error -> unit
val not_found : 'a option -> ('a, [> `Not_found ]) result Lwt.t
val staging : Fpath.t -> Fpath.t
val artifact_path : Builder_db.file -> Fpath.t
val cleanup_staging : Fpath.t -> Caqti_lwt.connection ->
(unit, [> `Msg of string ]) result Lwt.t

View file

@ -188,7 +188,7 @@ let artifact
~basename
~job_name
~build
~file:{ Builder_db.filepath; localpath = _; sha256; size }
~file:{ Builder_db.filepath; sha256; size }
=
let artifact_link =
Link.Job_build_artifact.make

View file

@ -32,6 +32,8 @@ Options:
Hex encoded SHA256 digest of the main binary.
--job=STRING
Job name that was built.
--main-binary-filepath=STRING
The file path of the main binary.
EOM
exit 1
}
@ -39,6 +41,7 @@ EOM
BUILD_TIME=
SHA=
JOB=
FILEPATH=
while [ $# -gt 1 ]; do
OPT="$1"
@ -53,6 +56,9 @@ while [ $# -gt 1 ]; do
--job=*)
JOB="${OPT##*=}"
;;
--main-binary-filepath=*)
FILEPATH="${OPT##*=}"
;;
--*)
warn "Ignoring unknown option: '${OPT}'"
;;
@ -67,13 +73,14 @@ done
[ -z "${BUILD_TIME}" ] && die "The --build-time option must be specified"
[ -z "${SHA}" ] && die "The --sha256 option must be specified"
[ -z "${JOB}" ] && die "The --job option must be specified"
[ -z "${FILEPATH}" ] && die "The --main-binary-filepath option must be specified"
FILENAME="${1}"
: "${REPO:="/usr/local/www/pkg"}"
: "${REPO_KEY:="/usr/local/etc/builder-web/repo.key"}"
if [ "$(basename "${FILENAME}" .pkg)" = "$(basename "${FILENAME}")" ]; then
if [ "$(basename "${FILEPATH}" .pkg)" = "$(basename "${FILEPATH}")" ]; then
echo "Not a FreeBSD package"
exit 0
fi

View file

@ -36,6 +36,8 @@ Options:
Job name that was built.
--platform=STRING
Platform name on which the build was performed.
--main-binary-filepath=STRING
The file path of the main binary.
EOM
exit 1
}
@ -44,6 +46,7 @@ BUILD_TIME=
SHA=
JOB=
PLATFORM=
FILEPATH=
while [ $# -gt 1 ]; do
OPT="$1"
@ -61,6 +64,9 @@ while [ $# -gt 1 ]; do
--platform=*)
PLATFORM="${OPT##*=}"
;;
--main-binary-filepath=*)
FILEPATH="${OPT##*=}"
;;
--*)
warn "Ignoring unknown option: '${OPT}'"
;;
@ -76,10 +82,11 @@ done
[ -z "${SHA}" ] && die "The --sha256 option must be specified"
[ -z "${JOB}" ] && die "The --job option must be specified"
[ -z "${PLATFORM}" ] && die "The --platform option must be specified"
[ -z "${FILEPATH}" ] && die "The --main-binary-filepath option must be specified"
FILENAME="${1}"
if [ $(basename "${FILENAME}" .deb) = $(basename "${FILENAME}") ]; then
if [ $(basename "${FILEPATH}" .deb) = $(basename "${FILEPATH}") ]; then
echo "Not a Debian package"
exit 0
fi

View file

@ -75,7 +75,8 @@ info "processing UUID '${UUID}'"
DB="${DATA_DIR}/builder.sqlite3"
get_main_binary () {
sqlite3 "${DB}" "SELECT ba.localpath FROM build AS b
sqlite3 "${DB}" "SELECT '_artifacts/' || substr(lower(hex(ba.sha256)), 1, 2) || '/' || lower(hex(ba.sha256))
FROM build AS b
JOIN build_artifact AS ba ON ba.build = b.id AND b.main_binary = ba.id
WHERE uuid = '${UUID}';"
}
@ -84,17 +85,19 @@ BIN="${DATA_DIR}/$(get_main_binary)" || die "Failed to get main binary from data
[ -z "${BIN}" ] && die "No main-binary found in db '${DB}' for build '${UUID}'"
get_debug_binary () {
sqlite3 "${DB}" "SELECT ba.localpath FROM build AS b
sqlite3 "${DB}" "SELECT '_artifacts/' || substr(lower(hex(ba.sha256)), 1, 2) || '/' || lower(hex(ba.sha256))
FROM build AS b
JOIN build_artifact AS ba ON ba.build = b.id
WHERE
uuid = '${UUID}'
AND ba.localpath LIKE '%.debug';"
AND ba.filepath LIKE '%.debug';"
}
DEBUG_BIN_RELATIVE="$(get_debug_binary)" || die "Failed to get debug binary from database"
get_opam_switch () {
sqlite3 "${DB}" "SELECT ba.localpath FROM build AS b
sqlite3 "${DB}" "SELECT '_artifacts/' || substr(lower(hex(ba.sha256)), 1, 2) || '/' || lower(hex(ba.sha256))
FROM build AS b
JOIN build_artifact AS ba ON ba.build = b.id
WHERE
uuid = '${UUID}'

View file

@ -43,18 +43,15 @@ module Testable = struct
let file =
let equal (x : Builder_db.Rep.file) (y : Builder_db.Rep.file) =
Fpath.equal x.filepath y.filepath &&
Fpath.equal x.localpath y.localpath &&
Cstruct.equal x.sha256 y.sha256 &&
x.size = y.size
in
let pp ppf { Builder_db.Rep.filepath; localpath; sha256; size } =
let pp ppf { Builder_db.Rep.filepath; sha256; size } =
Format.fprintf ppf "{@[<v 1>@;<1 0>Builder_db.Rep.filepath = %a;@;<1 0>\
localpath = %a;@;<1 0>\
sha256 = %a;@;<1 0>\
size = %d;@;<1 0>\
@]@,}"
Fpath.pp filepath Fpath.pp localpath
Cstruct.hexdump_pp sha256 size
Fpath.pp filepath Cstruct.hexdump_pp sha256 size
in
Alcotest.testable pp equal
@ -133,11 +130,10 @@ let finish = Option.get (Ptime.of_float_s 1.)
let result = Builder.Exited 0
let main_binary =
let filepath = Result.get_ok (Fpath.of_string "bin/hello.sh") in
let localpath = Result.get_ok (Fpath.of_string "/dev/null") in
let data = "#!/bin/sh\necho Hello, World\n" in
let sha256 = Mirage_crypto.Hash.SHA256.digest (Cstruct.of_string data) in
let size = String.length data in
{ Builder_db.Rep.filepath; localpath; sha256; size }
{ Builder_db.Rep.filepath; sha256; size }
let main_binary2 =
let data = "#!/bin/sh\necho Hello, World 2\n" in
let sha256 = Mirage_crypto.Hash.SHA256.digest (Cstruct.of_string data) in
@ -149,7 +145,6 @@ let fail_if_none a =
Option.to_result ~none:(`Msg "Failed to retrieve") a
let add_test_build user_id (module Db : CONN) =
let r =
let open Builder_db in
Db.start () >>= fun () ->
Db.exec Job.try_add job_name >>= fun () ->
@ -161,9 +156,6 @@ let add_test_build user_id (module Db : CONN) =
Db.find last_insert_rowid () >>= fun main_binary_id ->
Db.exec Build.set_main_binary (id, main_binary_id) >>= fun () ->
Db.commit ()
in
Result.fold ~ok:Result.ok ~error:(fun _ -> Db.rollback ())
r
let with_build_db f () =
or_fail
@ -269,6 +261,14 @@ let test_artifact_get_by_build_uuid (module Db : CONN) =
get_opt "no build" >>| fun (_id, file) ->
Alcotest.(check Testable.file) "same file" file main_binary
let test_artifact_exists_true (module Db : CONN) =
Db.find Builder_db.Build_artifact.exists main_binary.sha256 >>| fun exists ->
Alcotest.(check bool) "main binary exists" true exists
let test_artifact_exists_false (module Db : CONN) =
Db.find Builder_db.Build_artifact.exists main_binary2.sha256 >>| fun exists ->
Alcotest.(check bool) "main binary2 doesn't exists" false exists
(* XXX: This test should fail because main_binary on the corresponding build
* references its main_binary. This is not the case now due to foreign key. *)
let test_artifact_remove_by_build (module Db : CONN) =
@ -306,6 +306,8 @@ let () =
"build-artifact", [
test_case "Get all by build" `Quick (with_build_db test_artifact_get_all_by_build);
test_case "Get by build uuid" `Quick (with_build_db test_artifact_get_by_build_uuid);
test_case "Artifact exists" `Quick (with_build_db test_artifact_exists_true);
test_case "Other artifact doesn't exists" `Quick (with_build_db test_artifact_exists_false);
test_case "Remove by build" `Quick (with_build_db test_artifact_remove_by_build);
];
]