diff --git a/bin/builder_db_app.ml b/bin/builder_db_app.ml index 10ff322..99d9923 100644 --- a/bin/builder_db_app.ml +++ b/bin/builder_db_app.ml @@ -16,6 +16,13 @@ let defer_foreign_keys = Caqti_type.unit ->. Caqti_type.unit @@ "PRAGMA defer_foreign_keys = ON" +let build_artifacts_to_orphan = + Builder_db.Rep.id `build ->* Builder_db.Rep.cstruct @@ + {| SELECT a.sha256 FROM build_artifact a + WHERE a.build = ? AND + (SELECT COUNT(*) FROM build_artifact a2 + WHERE a2.sha256 = a.sha256 AND a2.build <> a.build) = 0 |} + let connect uri = let* (module Db : Caqti_blocking.CONNECTION) = Caqti_blocking.connect ~tweaks_version:(1,8) uri in let* () = Db.exec defer_foreign_keys () in @@ -36,6 +43,16 @@ let do_migrate dbpath = let migrate () dbpath = or_die 1 (do_migrate dbpath) +let artifacts_dir datadir = Fpath.(datadir / "_artifacts") +let artifact_path sha256 = + let (`Hex sha256) = Hex.of_cstruct sha256 in + (* NOTE: [sha256] is 64 characters when it's a hex sha256 checksum *) + (* NOTE: We add the prefix to reduce the number of files in a directory - a + workaround for inferior filesystems. We can easily revert this by changing + this function and adding a migration. *) + let prefix = String.sub sha256 0 2 in + Fpath.(v "_artifacts" / prefix / sha256) + let user_mod action dbpath scrypt_n scrypt_r scrypt_p username unrestricted = let scrypt_params = Builder_web_auth.scrypt_params ?scrypt_n ?scrypt_r ?scrypt_p () in let r = @@ -154,6 +171,18 @@ let job_remove () datadir jobname = (match Bos.OS.Dir.delete ~recurse:true dir with | Ok _ -> () | Error `Msg e -> Logs.warn (fun m -> m "failed to remove build directory %a: %s" Fpath.pp dir e)); + let* () = + Db.iter_s build_artifacts_to_orphan + (fun sha256 -> + let p = Fpath.(v datadir // artifact_path sha256) in + match Bos.OS.Path.delete p with + | Ok () -> Ok () + | Error `Msg e -> + Logs.warn (fun m -> m "failed to remove orphan artifact %a: %s" + Fpath.pp p e); + Ok ()) + build_id + in let* () = Db.exec Builder_db.Build_artifact.remove_by_build build_id in Db.exec Builder_db.Build.remove build_id) (Ok ()) @@ -214,12 +243,12 @@ let num_build_artifacts = Caqti_type.unit ->! Caqti_type.int @@ "SELECT count(*) FROM build_artifact" -let build_artifacts : (unit, string * Uuidm.t * (Fpath.t * Fpath.t * Cstruct.t * int64), [ `One | `Zero | `Many ]) Caqti_request.t = +let build_artifacts : (unit, string * Uuidm.t * (Fpath.t * Cstruct.t * int64), [ `One | `Zero | `Many ]) Caqti_request.t = Caqti_type.unit ->* Caqti_type.(tup3 string Builder_db.Rep.uuid - (tup4 Builder_db.Rep.fpath Builder_db.Rep.fpath Builder_db.Rep.cstruct int64)) + (tup3 Builder_db.Rep.fpath Builder_db.Rep.cstruct int64)) @@ - {| SELECT job.name, b.uuid, a.filepath, a.localpath, a.sha256, a.size + {| SELECT job.name, b.uuid, a.filepath, a.sha256, a.size FROM build_artifact a, build b, job WHERE a.build = b.id AND b.job = job.id |} @@ -257,36 +286,33 @@ let verify_data_dir () datadir = let idx = ref 0 in fun () -> incr idx; if !idx mod 100 = 0 then Logs.info (fun m -> m "%d" !idx); in - let verify_job_and_uuid ?fpath job uuid path = + let verify_job_and_uuid job uuid path = match Fpath.segs path with - | job' :: uuid' :: tl -> + | job' :: uuid' :: _tl -> if String.equal job job' then () else Logs.warn (fun m -> m "job names do not match: %s vs %s" job job'); if String.equal (Uuidm.to_string uuid) uuid' then () else Logs.warn (fun m -> m "uuid does not match: %s vs %s" (Uuidm.to_string uuid) uuid'); - (match fpath, tl with - | None, _ -> () - | Some f, "output" :: tl -> - if Fpath.equal (Fpath.v (String.concat "/" tl)) f then - () - else - Logs.err (fun m -> m "path (%a) and fpath (%a) do not match" Fpath.pp path Fpath.pp f) - | Some _, _ -> - Logs.err (fun m -> m "path is not of form //output/: %a" Fpath.pp path)) | _ -> Logs.err (fun m -> m "path is not of form //...: %a" Fpath.pp path) in let* () = - Db.iter_s build_artifacts (fun (job, uuid, (fpath, lpath, sha, size)) -> + Db.iter_s build_artifacts (fun (_job, _uuid, (_fpath, sha256, size)) -> progress (); - verify_job_and_uuid ~fpath job uuid lpath; - let abs_path = Fpath.(v datadir // lpath) in - (match Bos.OS.File.read abs_path with - | Error (`Msg msg) -> Logs.err (fun m -> m "file %a not present: %s" Fpath.pp abs_path msg) - | Ok data -> - files_tracked := FpathSet.add lpath !files_tracked; - let s = Int64.of_int (String.length data) in - if s <> size then Logs.err (fun m -> m "File %a has different size (in DB %Lu on disk %Lu)" Fpath.pp abs_path size s); - let sh = Mirage_crypto.Hash.SHA256.digest (Cstruct.of_string data) in - if not (Cstruct.equal sha sh) then Logs.err (fun m -> m "File %a has different hash (in DB %a on disk %a" Fpath.pp abs_path Cstruct.hexdump_pp sha Cstruct.hexdump_pp sh)) ; - Ok () + if not (FpathSet.mem (artifact_path sha256) !files_tracked) then + let abs_path = Fpath.(v datadir // artifact_path sha256) in + (match Bos.OS.File.read abs_path with + | Error (`Msg msg) -> Logs.err (fun m -> m "file %a not present: %s" Fpath.pp abs_path msg) + | Ok data -> + files_tracked := FpathSet.add (artifact_path sha256) !files_tracked; + let s = Int64.of_int (String.length data) in + if s <> size then Logs.err (fun m -> m "File %a has different size (in DB %Lu on disk %Lu)" Fpath.pp abs_path size s); + let sha256' = Mirage_crypto.Hash.SHA256.digest (Cstruct.of_string data) in + if not (Cstruct.equal sha256 sha256') then + Logs.err (fun m -> m "File %a has different hash (in DB %a on disk %a)" + Fpath.pp abs_path + Hex.pp (Hex.of_cstruct sha256) + Hex.pp (Hex.of_cstruct sha256'))) ; + Ok () + else + Ok () ) () in Db.iter_s script_and_console (fun (job, uuid, console, script) -> @@ -460,7 +486,7 @@ module Verify_cache_dir = struct AND ba_opam_switch.filepath = 'opam-switch' LEFT JOIN build_artifact AS ba_debug_bin ON ba_debug_bin.build = b.id - AND ba_debug_bin.localpath LIKE '%.debug' + AND ba_debug_bin.filepath LIKE '%.debug' |} let check_viz_nonempty ~cachedir ~viz_typ ~hash = @@ -681,9 +707,9 @@ let extract_full () datadir dest uuid = let out = console_of_string console in let* artifacts = Db.collect_list Builder_db.Build_artifact.get_all_by_build build_id in let* data = - List.fold_left (fun acc (_, { Builder_db.filepath; localpath; _ }) -> + List.fold_left (fun acc (_, { Builder_db.filepath; sha256; _ }) -> let* acc = acc in - let* data = Bos.OS.File.read Fpath.(v datadir // localpath) in + let* data = Bos.OS.File.read Fpath.(v datadir // artifact_path sha256) in Ok ((filepath, data) :: acc)) (Ok []) artifacts diff --git a/bin/migrations/builder_migrations.ml b/bin/migrations/builder_migrations.ml index 862ffd3..ae84f6f 100644 --- a/bin/migrations/builder_migrations.ml +++ b/bin/migrations/builder_migrations.ml @@ -180,6 +180,7 @@ let () = actions (module M20211105); actions (module M20220509); actions (module M20230911); + actions (module M20230914); ]) |> Cmd.eval |> exit diff --git a/bin/migrations/m20230914.ml b/bin/migrations/m20230914.ml new file mode 100644 index 0000000..e1b79db --- /dev/null +++ b/bin/migrations/m20230914.ml @@ -0,0 +1,162 @@ +let new_version = 18L and old_version = 17L +and identifier = "2023-09-14" +and migrate_doc = "Artifacts are stored content-addressed in the filesystem" +and rollback_doc = "Artifacts are stored under their build's job name and uuid" + +open Grej.Syntax + +let new_build_artifact = + Caqti_type.unit ->. Caqti_type.unit @@ + {| CREATE TABLE new_build_artifact ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + filepath TEXT NOT NULL, + sha256 BLOB NOT NULL, + size INTEGER NOT NULL, + build INTEGER NOT NULL, + + FOREIGN KEY(build) REFERENCES build(id), + UNIQUE(build, filepath) + ) + |} + +let old_build_artifact = + Caqti_type.unit ->. Caqti_type.unit @@ + {| CREATE TABLE new_build_artifact ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + filepath TEXT NOT NULL, -- the path as in the build + localpath TEXT NOT NULL, -- local path to the file on disk + sha256 BLOB NOT NULL, + size INTEGER NOT NULL, + build INTEGER NOT NULL, + + FOREIGN KEY(build) REFERENCES build(id), + UNIQUE(build, filepath) + ) + |} + +let idx_build_artifact_sha256 = + Caqti_type.unit ->. Caqti_type.unit @@ + "CREATE INDEX idx_build_artifact_sha256 ON build_artifact(sha256)" + +let idx_build_artifact_build = + Caqti_type.unit ->. Caqti_type.unit @@ + "CREATE INDEX idx_build_artifact_build ON build_artifact(build)" + +let copy_new_build_artifact = + Caqti_type.(unit ->. unit) @@ + {| INSERT INTO new_build_artifact(id, filepath, sha256, size, build) + SELECT id, filepath, sha256, size, build + FROM build_artifact + |} + +let copy_old_build_artifact = + Caqti_type.(unit ->. unit) @@ + {| INSERT INTO new_build_artifact(id, filepath, localpath, sha256, size, build) + SELECT a.id, a.filepath, + j.name || '/' || b.uuid || '/output/' || a.filepath, + a.sha256, a.size, a.build + FROM build_artifact a, job j, build b + WHERE b.id = a.build AND j.id = b.job + |} + +let new_build_artifact_paths = + Caqti_type.unit ->* Caqti_type.(tup2 string string) @@ + {| SELECT localpath, '_artifacts/' || substr(lower(hex(sha256)), 1, 2) || '/' || lower(hex(sha256)) + FROM build_artifact + |} + +let old_build_artifact_paths = + Caqti_type.unit ->* Caqti_type.(tup2 string string) @@ + {| SELECT '_artifacts/' || substr(lower(hex(a.sha256)), 1, 2) || '/' || lower(hex(a.sha256)), + j.name || '/' || b.uuid || '/output/' || a.filepath + FROM build_artifact a, job j, build b + WHERE b.id = a.build AND j.id = b.job + |} + +let drop_build_artifact = + Caqti_type.(unit ->. unit) @@ + "DROP TABLE build_artifact" + +let rename_build_artifact = + Caqti_type.(unit ->. unit) @@ + "ALTER TABLE new_build_artifact RENAME TO build_artifact" + +let move_paths ?force datadir (old_path, new_path) = + let old_path = Fpath.(datadir // v old_path) and new_path = Fpath.(datadir // v new_path) in + let* _created = Bos.OS.Dir.create (Fpath.parent new_path) in + Bos.OS.Path.move ?force old_path new_path + +let copy_paths datadir (old_path, new_path) = + let old_path = Fpath.(datadir // v old_path) and new_path = Fpath.(datadir // v new_path) in + let new_path_tmp = Fpath.(new_path + "tmp") in + let* _created = Bos.OS.Dir.create (Fpath.parent new_path) in + let cmd = Bos.Cmd.(v "cp" % p old_path % p new_path_tmp) in + let* () = + match Bos.OS.Cmd.run_status cmd with + | Ok `Exited 0 -> + Ok () + | Ok status -> + let _ = Bos.OS.Path.delete new_path_tmp in + Error (`Msg (Fmt.str "cp failed: %a" Bos.OS.Cmd.pp_status status)) + | Error _ as e -> + let _ = Bos.OS.Path.delete new_path_tmp in + e + in + Bos.OS.Path.move ~force:true new_path_tmp new_path + +let migrate datadir (module Db : Caqti_blocking.CONNECTION) = + let* () = Grej.check_version ~user_version:old_version (module Db) in + let* () = Db.exec new_build_artifact () in + let* () = Db.exec copy_new_build_artifact () in + let* () = Db.iter_s new_build_artifact_paths (move_paths ~force:true datadir) () in + let* () = Db.exec drop_build_artifact () in + let* () = Db.exec rename_build_artifact () in + let* () = Db.exec idx_build_artifact_sha256 () in + let* () = Db.exec idx_build_artifact_build () in + Db.exec (Grej.set_version new_version) () + +let rollback datadir (module Db : Caqti_blocking.CONNECTION) = + let* () = Grej.check_version ~user_version:new_version (module Db) in + let* () = Db.exec old_build_artifact () in + let* () = Db.exec copy_old_build_artifact () in + let* () = Db.iter_s old_build_artifact_paths (copy_paths datadir) () in + let* () = + Db.iter_s old_build_artifact_paths + (fun (old_path, _new_path) -> + Bos.OS.Path.delete Fpath.(datadir // v old_path)) + () + in + let* () = Db.exec drop_build_artifact () in + let* () = Db.exec rename_build_artifact () in + let* () = Db.exec idx_build_artifact_sha256 () in + Db.exec (Grej.set_version old_version) () + +(* migration failed but managed to move *some* files *) +let fixup_migrate datadir (module Db : Caqti_blocking.CONNECTION) = + let* () = Grej.check_version ~user_version:old_version (module Db) in + let* () = + Db.iter_s new_build_artifact_paths + (fun (old_path, new_path) -> + let* old_exists = Bos.OS.Path.exists Fpath.(datadir // v old_path) in + let* new_exists = Bos.OS.Path.exists Fpath.(datadir // v new_path) in + if new_exists && not old_exists then + copy_paths datadir (new_path, old_path) + else Ok ()) + () + in + Db.iter_s new_build_artifact_paths + (fun (_old_path, new_path) -> + Bos.OS.Path.delete Fpath.(datadir // v new_path)) + () + +(* rollback failed but some or all artifacts were copied *) +let fixup_rollback datadir (module Db : Caqti_blocking.CONNECTION) = + let* () = Grej.check_version ~user_version:new_version (module Db) in + Db.iter_s old_build_artifact_paths + (fun (old_path, new_path) -> + let* old_exists = Bos.OS.Path.exists Fpath.(datadir // v old_path) in + if old_exists then + Bos.OS.Path.delete Fpath.(datadir // v new_path) + else + move_paths datadir (new_path, old_path)) + () diff --git a/db/builder_db.ml b/db/builder_db.ml index cdcd190..226f2b9 100644 --- a/db/builder_db.ml +++ b/db/builder_db.ml @@ -11,7 +11,6 @@ type 'a id = 'a Rep.id type file = Rep.file = { filepath : Fpath.t; - localpath : Fpath.t; sha256 : Cstruct.t; size : int; } @@ -140,7 +139,6 @@ module Build_artifact = struct {| CREATE TABLE build_artifact ( id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, filepath TEXT NOT NULL, -- the path as in the build - localpath TEXT NOT NULL, -- local path to the file on disk sha256 BLOB NOT NULL, size INTEGER NOT NULL, build INTEGER NOT NULL, @@ -156,13 +154,13 @@ module Build_artifact = struct let get = id `build_artifact ->! file @@ - {| SELECT filepath, localpath, sha256, size + {| SELECT filepath, sha256, size FROM build_artifact WHERE id = ? |} let get_by_build_uuid = Caqti_type.tup2 uuid fpath ->? Caqti_type.tup2 (id `build_artifact) file @@ {| SELECT build_artifact.id, build_artifact.filepath, - build_artifact.localpath, build_artifact.sha256, build_artifact.size + build_artifact.sha256, build_artifact.size FROM build_artifact INNER JOIN build ON build.id = build_artifact.build WHERE build.uuid = ? AND build_artifact.filepath = ? @@ -170,12 +168,16 @@ module Build_artifact = struct let get_all_by_build = id `build ->* Caqti_type.(tup2 (id `build_artifact) file) @@ - "SELECT id, filepath, localpath, sha256, size FROM build_artifact WHERE build = ?" + "SELECT id, filepath, sha256, size FROM build_artifact WHERE build = ?" + + let exists = + cstruct ->! Caqti_type.bool @@ + "SELECT EXISTS(SELECT 1 FROM build_artifact WHERE sha256 = ?)" let add = Caqti_type.(tup2 file (id `build)) ->. Caqti_type.unit @@ - "INSERT INTO build_artifact (filepath, localpath, sha256, size, build) \ - VALUES (?, ?, ?, ?, ?)" + "INSERT INTO build_artifact (filepath, sha256, size, build) \ + VALUES (?, ?, ?, ?)" let remove_by_build = id `build ->. Caqti_type.unit @@ @@ -316,7 +318,7 @@ module Build = struct b.uuid, b.start_d, b.start_ps, b.finish_d, b.finish_ps, b.result_code, b.result_msg, b.console, b.script, b.platform, b.main_binary, b.input_id, b.user, b.job, - a.filepath, a.localpath, a.sha256, a.size + a.filepath, a.sha256, a.size FROM build b, build_artifact a WHERE b.main_binary = a.id AND b.job = $1 AND b.platform = $2 AND b.main_binary IS NOT NULL @@ -442,7 +444,7 @@ module Build = struct {| SELECT b.uuid, b.start_d, b.start_ps, b.finish_d, b.finish_ps, b.result_code, b.result_msg, b.console, b.script, b.platform, b.main_binary, b.input_id, b.user, b.job, - a.filepath, a.localpath, a.sha256, a.size + a.filepath, a.sha256, a.size FROM build_artifact a INNER JOIN build b ON b.id = a.build WHERE a.sha256 = ? @@ -593,6 +595,8 @@ let migrate = [ "CREATE INDEX idx_build_main_binary ON build(main_binary)"; Caqti_type.unit ->. Caqti_type.unit @@ "CREATE INDEX idx_build_artifact_sha256 ON build_artifact(sha256)"; + Caqti_type.unit ->. Caqti_type.unit @@ + "CREATE INDEX idx_build_artifact_build ON build_artifact(build)"; set_current_version; set_application_id; ] @@ -606,6 +610,8 @@ let rollback = [ Build.rollback; Job.rollback; Caqti_type.unit ->. Caqti_type.unit @@ + "DROP INDEX IF EXISTS idx_build_artifact_build"; + Caqti_type.unit ->. Caqti_type.unit @@ "DROP INDEX IF EXISTS idx_build_artifact_sha256"; Caqti_type.unit ->. Caqti_type.unit @@ "DROP INDEX IF EXISTS idx_build_failed"; diff --git a/db/builder_db.mli b/db/builder_db.mli index 2988c12..9d1f052 100644 --- a/db/builder_db.mli +++ b/db/builder_db.mli @@ -3,7 +3,6 @@ module Rep : sig type 'a id type file = { filepath : Fpath.t; - localpath : Fpath.t; sha256 : Cstruct.t; size : int; } @@ -23,7 +22,6 @@ type 'a id = 'a Rep.id type file = Rep.file = { filepath : Fpath.t; - localpath : Fpath.t; sha256 : Cstruct.t; size : int; } @@ -87,6 +85,7 @@ module Build_artifact : sig Caqti_request.t val get_all_by_build : ([`build] id, [`build_artifact] id * file, [ `Many | `One | `Zero ]) Caqti_request.t + val exists : (Cstruct.t, bool, [ `One ]) Caqti_request.t val add : (file * [`build] id, unit, [ `Zero ]) Caqti_request.t val remove_by_build : diff --git a/db/representation.ml b/db/representation.ml index 4d94b93..1cc1722 100644 --- a/db/representation.ml +++ b/db/representation.ml @@ -30,7 +30,6 @@ let id_to_int64 (id : 'a id) : int64 = id type file = { filepath : Fpath.t; - localpath : Fpath.t; sha256 : Cstruct.t; size : int; } @@ -63,24 +62,24 @@ let cstruct = Caqti_type.custom ~encode ~decode Caqti_type.octets let file = - let encode { filepath; localpath; sha256; size } = - Ok (filepath, localpath, sha256, size) in - let decode (filepath, localpath, sha256, size) = - Ok { filepath; localpath; sha256; size } in - Caqti_type.custom ~encode ~decode Caqti_type.(tup4 fpath fpath cstruct int) + let encode { filepath; sha256; size } = + Ok (filepath, sha256, size) in + let decode (filepath, sha256, size) = + Ok { filepath; sha256; size } in + Caqti_type.custom ~encode ~decode Caqti_type.(tup3 fpath cstruct int) let file_opt = - let rep = Caqti_type.(tup4 (option fpath) (option fpath) (option cstruct) (option int)) in + let rep = Caqti_type.(tup3 (option fpath) (option cstruct) (option int)) in let encode = function - | Some { filepath; localpath; sha256; size } -> - Ok (Some filepath, Some localpath, Some sha256, Some size) + | Some { filepath; sha256; size } -> + Ok (Some filepath, Some sha256, Some size) | None -> - Ok (None, None, None, None) + Ok (None, None, None) in let decode = function - | (Some filepath, Some localpath, Some sha256, Some size) -> - Ok (Some { filepath; localpath; sha256; size }) - | (None, None, None, None) -> + | (Some filepath, Some sha256, Some size) -> + Ok (Some { filepath; sha256; size }) + | (None, None, None) -> Ok None | _ -> (* This should not happen if the database is well-formed *) diff --git a/lib/builder_web.ml b/lib/builder_web.ml index 4a40dcf..feb596e 100644 --- a/lib/builder_web.ml +++ b/lib/builder_web.ml @@ -211,9 +211,9 @@ module Viz_aux = struct match typ with | `Treemap -> let debug_binary = - let bin = Fpath.base main_binary.localpath in + let bin = Fpath.base main_binary.filepath in List.find_opt - (fun p -> Fpath.(equal (bin + "debug") (base p.localpath))) + (fun p -> Fpath.(equal (bin + "debug") (base p.filepath))) artifacts in begin @@ -226,7 +226,7 @@ module Viz_aux = struct | `Dependencies -> let opam_switch = List.find_opt - (fun p -> Fpath.(equal (v "opam-switch") (base p.localpath))) + (fun p -> Fpath.(equal (v "opam-switch") (base p.filepath))) artifacts in Model.not_found opam_switch @@ -435,8 +435,8 @@ let routes ~datadir ~cachedir ~configdir ~expired_jobs = | _ -> Model.build_artifact_data datadir file |> if_error "Error getting build artifact" - ~log:(fun e -> Log.warn (fun m -> m "Error getting build artifact data for file %a in %a: %a" - Fpath.pp file.Builder_db.filepath Fpath.pp file.Builder_db.localpath + ~log:(fun e -> Log.warn (fun m -> m "Error getting build artifact data for file %a: %a" + Fpath.pp file.Builder_db.filepath pp_error e)) >>= fun data -> let headers = [ "Content-Type", mime_lookup file.Builder_db.filepath; diff --git a/lib/dream_tar.ml b/lib/dream_tar.ml index b5a5899..2cad5f6 100644 --- a/lib/dream_tar.ml +++ b/lib/dream_tar.ml @@ -67,7 +67,7 @@ let targz_response datadir finish (files : Builder_db.file list) (stream : Dream in Lwt_list.iter_s (fun file -> let hdr = header_of_file finish file in - write_block hdr Fpath.(datadir // file.localpath) state) + write_block hdr Fpath.(datadir // Model.artifact_path file) state) files >>= fun () -> Writer.really_write state Tar.Header.zero_block >>= fun () -> Writer.really_write state Tar.Header.zero_block >>= fun () -> diff --git a/lib/model.ml b/lib/model.ml index 91e25b4..59ad22e 100644 --- a/lib/model.ml +++ b/lib/model.ml @@ -19,6 +19,14 @@ let not_found = function | Some v -> Lwt_result.return v let staging datadir = Fpath.(datadir / "_staging") +let artifact_path artifact = + let (`Hex sha256) = Hex.of_cstruct artifact.Builder_db.sha256 in + (* NOTE: [sha256] is 64 characters when it's a hex sha256 checksum *) + (* NOTE: We add the prefix to reduce the number of files in a directory - a + workaround for inferior filesystems. We can easily revert this by changing + this function and adding a migration. *) + let prefix = String.sub sha256 0 2 in + Fpath.(v "_artifacts" / prefix / sha256) let read_file datadir filepath = let filepath = Fpath.(datadir // filepath) in @@ -44,14 +52,14 @@ let build_artifact_by_id id (module Db : CONN) = Db.find Builder_db.Build_artifact.get id let build_artifact_data datadir file = - read_file datadir file.Builder_db.localpath + read_file datadir (artifact_path file) let build_artifacts build (module Db : CONN) = Db.collect_list Builder_db.Build_artifact.get_all_by_build build >|= List.map snd let solo5_manifest datadir file = - let buf = Owee_buf.map_binary Fpath.(to_string (datadir // file.Builder_db.localpath)) in + let buf = Owee_buf.map_binary Fpath.(to_string (datadir // artifact_path file)) in Solo5_elftool.query_manifest buf |> Result.to_option let platforms_of_job id (module Db : CONN) = @@ -196,11 +204,11 @@ let cleanup_staging datadir (module Db : Caqti_lwt.CONNECTION) = (cleanup_staged staged)) stageds -let save file data = +let save path data = let open Lwt.Infix in Lwt.catch (fun () -> - Lwt_io.open_file ~mode:Lwt_io.Output (Fpath.to_string file) >>= fun oc -> + Lwt_io.open_file ~mode:Lwt_io.Output (Fpath.to_string path) >>= fun oc -> Lwt_io.write oc data >>= fun () -> Lwt_io.close oc |> Lwt_result.ok) @@ -209,33 +217,29 @@ let save file data = Lwt_result.fail (`Msg (Unix.error_message e)) | e -> Lwt.fail e) -let save_file dir staging (filepath, data) = - let size = String.length data in - let sha256 = Mirage_crypto.Hash.SHA256.digest (Cstruct.of_string data) in - let localpath = Fpath.append dir filepath in - let destpath = Fpath.append staging filepath in - Lwt_result.lift (Bos.OS.Dir.create (Fpath.parent destpath)) >>= fun _ -> - save destpath data >|= fun () -> - { Builder_db.filepath; localpath; sha256; size } - -let save_files dir staging files = +let save_artifacts staging artifacts = List.fold_left - (fun r file -> - r >>= fun acc -> - save_file dir staging file >>= fun file -> - Lwt_result.return (file :: acc)) - (Lwt_result.return []) - files + (fun r (file, data) -> + r >>= fun () -> + let (`Hex sha256) = Hex.of_cstruct file.Builder_db.sha256 in + let destpath = Fpath.(staging / sha256) in + save destpath data) + (Lwt_result.return ()) + artifacts -let save_all staging_dir (job : Builder.script_job) uuid artifacts = - let build_dir = Fpath.(v job.Builder.name / Uuidm.to_string uuid) in - let output_dir = Fpath.(build_dir / "output") - and staging_output_dir = Fpath.(staging_dir / "output") in - Lwt.return (Bos.OS.Dir.create staging_output_dir) >>= fun _ -> - save_files output_dir staging_output_dir artifacts >>= fun artifacts -> - Lwt_result.return artifacts - -let commit_files datadir staging_dir job_name uuid = +let commit_files datadir staging_dir job_name uuid artifacts = + (* First we move the artifacts *) + List.fold_left + (fun r artifact -> + r >>= fun () -> + let (`Hex sha256) = Hex.of_cstruct artifact.Builder_db.sha256 in + let src = Fpath.(staging_dir / sha256) in + let dest = Fpath.(datadir // artifact_path artifact) in + Lwt.return (Bos.OS.Dir.create (Fpath.parent dest)) >>= fun _created -> + Lwt.return (Bos.OS.Path.move ~force:true src dest)) + (Lwt_result.return ()) + artifacts >>= fun () -> + (* Now the staging dir only contains script & console *) let job_dir = Fpath.(datadir / job_name) in let dest = Fpath.(job_dir / Uuidm.to_string uuid) in Lwt.return (Bos.OS.Dir.create job_dir) >>= fun _ -> @@ -324,6 +328,25 @@ let prepare_staging staging_dir = then Lwt_result.fail (`Msg "build directory already exists") else Lwt_result.return () +(* saving: + - for each artifact compute its sha256 checksum -- calling Lwt.pause in + between + - lookup artifact sha256 in the database and filter them out of the list: not_in_db + - mkdir -p _staging/uuid/ + - save console & script to _staging/uuid/ + - save each artifact in not_in_db as _staging/uuid/sha256 + committing: + - for each artifact mv _staging/uuid/sha256 _artifacts/sha256 + (or _artifacts/prefix(sha256)/sha256 where prefix(sha256) is the first two hex digits in sha256) + - now _staging/uuid only contains console & script so we mv _staging/uuid _staging/job/uuid + potential issues: + - race condition in uploading same artifact: + * if the artifact already exists in the database and thus filesystem then nothing is done + * if the artifact is added to the database and/or filesystem we atomically overwrite it + - input_id depends on a sort order? + *) + + let add_build ~datadir ~cachedir @@ -344,16 +367,35 @@ let add_build e) x in - let artifacts_to_preserve = - let not_interesting p = - String.equal (Fpath.basename p) "README.md" || String.equal (Fpath.get_ext p) ".build-hashes" - in - List.filter (fun (p, _) -> not (not_interesting p)) raw_artifacts + let not_interesting p = + String.equal (Fpath.basename p) "README.md" || String.equal (Fpath.get_ext p) ".build-hashes" in + begin + List.fold_left + (fun r (filepath, data) -> + r >>= fun acc -> + if not_interesting filepath then + Lwt_result.return acc + else + let sha256 = Mirage_crypto.Hash.SHA256.digest (Cstruct.of_string data) + and size = String.length data in + Lwt_result.ok (Lwt.pause ()) >|= fun () -> + ({ filepath; sha256; size }, data) :: acc) + (Lwt_result.return []) + raw_artifacts + end >>= fun artifacts -> or_cleanup (prepare_staging staging_dir) >>= fun () -> or_cleanup (save_console_and_script staging_dir job_name uuid console job.Builder.script) >>= fun (console, script) -> - or_cleanup (save_all staging_dir job uuid artifacts_to_preserve) >>= fun artifacts -> + List.fold_left + (fun r ((f, _) as artifact) -> + r >>= fun acc -> + Db.find Builder_db.Build_artifact.exists f.sha256 >|= fun exists -> + if exists then acc else artifact :: acc) + (Lwt_result.return []) + artifacts >>= fun artifacts_to_save -> + or_cleanup (save_artifacts staging_dir artifacts_to_save) >>= fun () -> + let artifacts = List.map fst artifacts in let r = Db.start () >>= fun () -> Db.exec Job.try_add job_name >>= fun () -> @@ -422,7 +464,7 @@ let add_build (Lwt_result.return ()) remaining_artifacts_to_add >>= fun () -> Db.commit () >>= fun () -> - commit_files datadir staging_dir job_name uuid >|= fun () -> + commit_files datadir staging_dir job_name uuid artifacts >|= fun () -> main_binary in Lwt_result.bind_lwt_error (or_cleanup r) @@ -451,7 +493,8 @@ let add_build "--uuid=" ^ uuid ; "--platform=" ^ platform ; "--cache-dir=" ^ Fpath.to_string cachedir ; "--data-dir=" ^ Fpath.to_string datadir ; - fp_str main_binary.localpath ]) + "--main-binary-filepath=" ^ Fpath.to_string main_binary.filepath ; + fp_str Fpath.(datadir // artifact_path main_binary) ]) in Log.debug (fun m -> m "executing hooks with %s" args); let dir = Fpath.(configdir / "upload-hooks") in diff --git a/lib/model.mli b/lib/model.mli index 2b36a7c..c062f92 100644 --- a/lib/model.mli +++ b/lib/model.mli @@ -5,6 +5,7 @@ val pp_error : Format.formatter -> error -> unit val not_found : 'a option -> ('a, [> `Not_found ]) result Lwt.t val staging : Fpath.t -> Fpath.t +val artifact_path : Builder_db.file -> Fpath.t val cleanup_staging : Fpath.t -> Caqti_lwt.connection -> (unit, [> `Msg of string ]) result Lwt.t diff --git a/lib/views.ml b/lib/views.ml index 9e9e3e3..8a597f8 100644 --- a/lib/views.ml +++ b/lib/views.ml @@ -188,7 +188,7 @@ let artifact ~basename ~job_name ~build - ~file:{ Builder_db.filepath; localpath = _; sha256; size } + ~file:{ Builder_db.filepath; sha256; size } = let artifact_link = Link.Job_build_artifact.make diff --git a/packaging/FreeBSD-repo.sh b/packaging/FreeBSD-repo.sh index 9d76033..05e3a69 100755 --- a/packaging/FreeBSD-repo.sh +++ b/packaging/FreeBSD-repo.sh @@ -32,6 +32,8 @@ Options: Hex encoded SHA256 digest of the main binary. --job=STRING Job name that was built. + --main-binary-filepath=STRING + The file path of the main binary. EOM exit 1 } @@ -39,6 +41,7 @@ EOM BUILD_TIME= SHA= JOB= +FILEPATH= while [ $# -gt 1 ]; do OPT="$1" @@ -53,6 +56,9 @@ while [ $# -gt 1 ]; do --job=*) JOB="${OPT##*=}" ;; + --main-binary-filepath=*) + FILEPATH="${OPT##*=}" + ;; --*) warn "Ignoring unknown option: '${OPT}'" ;; @@ -67,13 +73,14 @@ done [ -z "${BUILD_TIME}" ] && die "The --build-time option must be specified" [ -z "${SHA}" ] && die "The --sha256 option must be specified" [ -z "${JOB}" ] && die "The --job option must be specified" +[ -z "${FILEPATH}" ] && die "The --main-binary-filepath option must be specified" FILENAME="${1}" : "${REPO:="/usr/local/www/pkg"}" : "${REPO_KEY:="/usr/local/etc/builder-web/repo.key"}" -if [ "$(basename "${FILENAME}" .pkg)" = "$(basename "${FILENAME}")" ]; then +if [ "$(basename "${FILEPATH}" .pkg)" = "$(basename "${FILEPATH}")" ]; then echo "Not a FreeBSD package" exit 0 fi diff --git a/packaging/dpkg-repo.sh b/packaging/dpkg-repo.sh index 4cfd61d..6b47cf0 100755 --- a/packaging/dpkg-repo.sh +++ b/packaging/dpkg-repo.sh @@ -36,6 +36,8 @@ Options: Job name that was built. --platform=STRING Platform name on which the build was performed. + --main-binary-filepath=STRING + The file path of the main binary. EOM exit 1 } @@ -44,6 +46,7 @@ BUILD_TIME= SHA= JOB= PLATFORM= +FILEPATH= while [ $# -gt 1 ]; do OPT="$1" @@ -61,6 +64,9 @@ while [ $# -gt 1 ]; do --platform=*) PLATFORM="${OPT##*=}" ;; + --main-binary-filepath=*) + FILEPATH="${OPT##*=}" + ;; --*) warn "Ignoring unknown option: '${OPT}'" ;; @@ -76,10 +82,11 @@ done [ -z "${SHA}" ] && die "The --sha256 option must be specified" [ -z "${JOB}" ] && die "The --job option must be specified" [ -z "${PLATFORM}" ] && die "The --platform option must be specified" +[ -z "${FILEPATH}" ] && die "The --main-binary-filepath option must be specified" FILENAME="${1}" -if [ $(basename "${FILENAME}" .deb) = $(basename "${FILENAME}") ]; then +if [ $(basename "${FILEPATH}" .deb) = $(basename "${FILEPATH}") ]; then echo "Not a Debian package" exit 0 fi diff --git a/packaging/visualizations.sh b/packaging/visualizations.sh index a2f5c8d..5e575df 100755 --- a/packaging/visualizations.sh +++ b/packaging/visualizations.sh @@ -75,7 +75,8 @@ info "processing UUID '${UUID}'" DB="${DATA_DIR}/builder.sqlite3" get_main_binary () { - sqlite3 "${DB}" "SELECT ba.localpath FROM build AS b + sqlite3 "${DB}" "SELECT '_artifacts/' || substr(lower(hex(ba.sha256)), 1, 2) || '/' || lower(hex(ba.sha256)) + FROM build AS b JOIN build_artifact AS ba ON ba.build = b.id AND b.main_binary = ba.id WHERE uuid = '${UUID}';" } @@ -84,17 +85,19 @@ BIN="${DATA_DIR}/$(get_main_binary)" || die "Failed to get main binary from data [ -z "${BIN}" ] && die "No main-binary found in db '${DB}' for build '${UUID}'" get_debug_binary () { - sqlite3 "${DB}" "SELECT ba.localpath FROM build AS b + sqlite3 "${DB}" "SELECT '_artifacts/' || substr(lower(hex(ba.sha256)), 1, 2) || '/' || lower(hex(ba.sha256)) + FROM build AS b JOIN build_artifact AS ba ON ba.build = b.id WHERE uuid = '${UUID}' - AND ba.localpath LIKE '%.debug';" + AND ba.filepath LIKE '%.debug';" } DEBUG_BIN_RELATIVE="$(get_debug_binary)" || die "Failed to get debug binary from database" get_opam_switch () { - sqlite3 "${DB}" "SELECT ba.localpath FROM build AS b + sqlite3 "${DB}" "SELECT '_artifacts/' || substr(lower(hex(ba.sha256)), 1, 2) || '/' || lower(hex(ba.sha256)) + FROM build AS b JOIN build_artifact AS ba ON ba.build = b.id WHERE uuid = '${UUID}' diff --git a/test/test_builder_db.ml b/test/test_builder_db.ml index ce2d257..db56022 100644 --- a/test/test_builder_db.ml +++ b/test/test_builder_db.ml @@ -43,18 +43,15 @@ module Testable = struct let file = let equal (x : Builder_db.Rep.file) (y : Builder_db.Rep.file) = Fpath.equal x.filepath y.filepath && - Fpath.equal x.localpath y.localpath && Cstruct.equal x.sha256 y.sha256 && x.size = y.size in - let pp ppf { Builder_db.Rep.filepath; localpath; sha256; size } = + let pp ppf { Builder_db.Rep.filepath; sha256; size } = Format.fprintf ppf "{@[@;<1 0>Builder_db.Rep.filepath = %a;@;<1 0>\ - localpath = %a;@;<1 0>\ sha256 = %a;@;<1 0>\ size = %d;@;<1 0>\ @]@,}" - Fpath.pp filepath Fpath.pp localpath - Cstruct.hexdump_pp sha256 size + Fpath.pp filepath Cstruct.hexdump_pp sha256 size in Alcotest.testable pp equal @@ -133,11 +130,10 @@ let finish = Option.get (Ptime.of_float_s 1.) let result = Builder.Exited 0 let main_binary = let filepath = Result.get_ok (Fpath.of_string "bin/hello.sh") in - let localpath = Result.get_ok (Fpath.of_string "/dev/null") in let data = "#!/bin/sh\necho Hello, World\n" in let sha256 = Mirage_crypto.Hash.SHA256.digest (Cstruct.of_string data) in let size = String.length data in - { Builder_db.Rep.filepath; localpath; sha256; size } + { Builder_db.Rep.filepath; sha256; size } let main_binary2 = let data = "#!/bin/sh\necho Hello, World 2\n" in let sha256 = Mirage_crypto.Hash.SHA256.digest (Cstruct.of_string data) in @@ -149,21 +145,17 @@ let fail_if_none a = Option.to_result ~none:(`Msg "Failed to retrieve") a let add_test_build user_id (module Db : CONN) = - let r = - let open Builder_db in - Db.start () >>= fun () -> - Db.exec Job.try_add job_name >>= fun () -> - Db.find_opt Job.get_id_by_name job_name >>= fail_if_none >>= fun job_id -> - Db.exec Build.add { Build.uuid; start; finish; result; console; script; platform; - main_binary = None; input_id = None; user_id; job_id } >>= fun () -> - Db.find last_insert_rowid () >>= fun id -> - Db.exec Build_artifact.add (main_binary, id) >>= fun () -> - Db.find last_insert_rowid () >>= fun main_binary_id -> - Db.exec Build.set_main_binary (id, main_binary_id) >>= fun () -> - Db.commit () - in - Result.fold ~ok:Result.ok ~error:(fun _ -> Db.rollback ()) - r + let open Builder_db in + Db.start () >>= fun () -> + Db.exec Job.try_add job_name >>= fun () -> + Db.find_opt Job.get_id_by_name job_name >>= fail_if_none >>= fun job_id -> + Db.exec Build.add { Build.uuid; start; finish; result; console; script; platform; + main_binary = None; input_id = None; user_id; job_id } >>= fun () -> + Db.find last_insert_rowid () >>= fun id -> + Db.exec Build_artifact.add (main_binary, id) >>= fun () -> + Db.find last_insert_rowid () >>= fun main_binary_id -> + Db.exec Build.set_main_binary (id, main_binary_id) >>= fun () -> + Db.commit () let with_build_db f () = or_fail @@ -269,6 +261,14 @@ let test_artifact_get_by_build_uuid (module Db : CONN) = get_opt "no build" >>| fun (_id, file) -> Alcotest.(check Testable.file) "same file" file main_binary +let test_artifact_exists_true (module Db : CONN) = + Db.find Builder_db.Build_artifact.exists main_binary.sha256 >>| fun exists -> + Alcotest.(check bool) "main binary exists" true exists + +let test_artifact_exists_false (module Db : CONN) = + Db.find Builder_db.Build_artifact.exists main_binary2.sha256 >>| fun exists -> + Alcotest.(check bool) "main binary2 doesn't exists" false exists + (* XXX: This test should fail because main_binary on the corresponding build * references its main_binary. This is not the case now due to foreign key. *) let test_artifact_remove_by_build (module Db : CONN) = @@ -306,6 +306,8 @@ let () = "build-artifact", [ test_case "Get all by build" `Quick (with_build_db test_artifact_get_all_by_build); test_case "Get by build uuid" `Quick (with_build_db test_artifact_get_by_build_uuid); + test_case "Artifact exists" `Quick (with_build_db test_artifact_exists_true); + test_case "Other artifact doesn't exists" `Quick (with_build_db test_artifact_exists_false); test_case "Remove by build" `Quick (with_build_db test_artifact_remove_by_build); ]; ]