Add input_id to build table

The input_id column is a checksum of the files relevant for reproducibility.
This commit is contained in:
Robur 2021-07-06 13:41:26 +00:00
parent be26e56fd4
commit 6ec40365ab
7 changed files with 174 additions and 15 deletions

View file

@ -162,6 +162,46 @@ let job_remove () datadir jobname =
in in
or_die 1 r or_die 1 r
let input_ids =
Caqti_request.collect
Caqti_type.unit
Builder_db.Rep.cstruct
"SELECT DISTINCT input_id FROM build WHERE input_id IS NOT NULL"
let main_artifact_hash =
Caqti_request.collect
Builder_db.Rep.cstruct
(Caqti_type.tup3 Builder_db.Rep.cstruct Builder_db.Rep.uuid Caqti_type.string)
{|
SELECT a.sha256, b.uuid, j.name FROM build_artifact a, build b, job j
WHERE b.input_id = ? AND a.id = b.main_binary AND b.job = j.id
|}
let verify_input_id () dbpath =
let r =
connect
(Uri.make ~scheme:"sqlite3" ~path:dbpath ~query:["create", ["false"]] ())
>>= fun (module Db : Caqti_blocking.CONNECTION) ->
Db.collect_list input_ids () >>= fun input_ids ->
List.fold_left (fun acc input_id ->
acc >>= fun () ->
Db.collect_list main_artifact_hash input_id >>| fun hashes ->
match hashes with
| (h, uuid, jobname) :: tl ->
List.iter (fun (h', uuid', _) ->
if Cstruct.equal h h' then
()
else
Logs.warn (fun m -> m "job %s input id %a with two different hashes (%a, %a), build %a and %a"
jobname Cstruct.hexdump_pp input_id
Cstruct.hexdump_pp h Cstruct.hexdump_pp h'
Uuidm.pp uuid Uuidm.pp uuid'))
tl
| [] -> ())
(Ok ()) input_ids
in
or_die 1 r
let help man_format cmds = function let help man_format cmds = function
| None -> `Help (man_format, None) | None -> `Help (man_format, None)
| Some cmd -> | Some cmd ->
@ -286,6 +326,10 @@ let job_remove_cmd =
(Cmdliner.Term.(pure job_remove $ setup_log $ datadir $ jobname), (Cmdliner.Term.(pure job_remove $ setup_log $ datadir $ jobname),
Cmdliner.Term.info ~doc "job-remove") Cmdliner.Term.info ~doc "job-remove")
let verify_input_id_cmd =
let doc = "verify that the main binary hash of all builds with the same input are equal" in
(Cmdliner.Term.(pure verify_input_id $ setup_log $ dbpath),
Cmdliner.Term.info ~doc "verify-input-id")
let help_cmd = let help_cmd =
let topic = let topic =
@ -307,5 +351,6 @@ let () =
default_cmd default_cmd
[help_cmd; migrate_cmd; [help_cmd; migrate_cmd;
user_add_cmd; user_update_cmd; user_remove_cmd; user_list_cmd; user_disable_cmd; user_add_cmd; user_update_cmd; user_remove_cmd; user_list_cmd; user_disable_cmd;
access_add_cmd; access_remove_cmd; job_remove_cmd] access_add_cmd; access_remove_cmd; job_remove_cmd;
verify_input_id_cmd ]
|> Cmdliner.Term.exit |> Cmdliner.Term.exit

View file

@ -123,5 +123,6 @@ let () =
actions (module M20210629); actions (module M20210629);
actions (module M20210630); actions (module M20210630);
actions (module M20210701); actions (module M20210701);
actions (module M20210706);
]) ])
|> Cmdliner.Term.exit |> Cmdliner.Term.exit

View file

@ -0,0 +1,99 @@
let new_version = 12L and old_version = 11L
let identifier = "2021-07-06"
let migrate_doc = "add a input_id column to the build table"
let rollback_doc = "remove the input_id column from the build table"
let add_input_id_to_build =
Caqti_request.exec
Caqti_type.unit
{| ALTER TABLE build ADD COLUMN input_id BLOB |}
let idx_build_job_start =
Caqti_request.exec Caqti_type.unit
"CREATE INDEX idx_build_job_start ON build(job, start_d DESC, start_ps DESC)"
let old_build =
Caqti_request.exec
Caqti_type.unit
{| CREATE TABLE build (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
uuid VARCHAR(36) NOT NULL UNIQUE,
start_d INTEGER NOT NULL,
start_ps INTEGER NOT NULL,
finish_d INTEGER NOT NULL,
finish_ps INTEGER NOT NULL,
result_kind TINYINT NOT NULL,
result_code INTEGER,
result_msg TEXT,
console BLOB NOT NULL,
script TEXT NOT NULL,
main_binary INTEGER,
user INTEGER NOT NULL,
job INTEGER NOT NULL,
FOREIGN KEY(main_binary) REFERENCES build_artifact(id) DEFERRABLE INITIALLY DEFERRED,
FOREIGN KEY(user) REFERENCES user(id),
FOREIGN KEY(job) REFERENCES job(id)
)
|}
let copy_build =
Caqti_request.exec
Caqti_type.unit
"INSERT INTO new_build SELECT id, uuid, start_d, start_ps, finish_d, finish_ps, result_kind, result_code, result_msg, console, script, main_binary, user, job FROM build"
let drop_build =
Caqti_request.exec
Caqti_type.unit
"DROP TABLE build"
let rename_build =
Caqti_request.exec
Caqti_type.unit
"ALTER TABLE new_build RENAME TO build"
let drop_input_id_from_build =
Caqti_request.exec
Caqti_type.unit
{| ALTER TABLE build DROP COLUMN input_id |}
let builds =
Caqti_request.collect
Caqti_type.unit
(Caqti_type.tup4
Builder_db.Rep.untyped_id
Builder_db.Rep.cstruct
Builder_db.Rep.cstruct
Builder_db.Rep.cstruct)
{| SELECT b.id, opam.sha256, env.sha256, system.sha256
FROM build b, build_artifact opam, build_artifact env, build_artifact system
WHERE opam.filepath = 'opam-switch' AND env.filepath = 'build-environment'
AND system.filepath = 'system-packages'
AND opam.build = b.id AND env.build = b.id AND system.build = b.id
|}
let set_input_id =
Caqti_request.exec
(Caqti_type.tup2 Builder_db.Rep.untyped_id Builder_db.Rep.cstruct)
"UPDATE build SET input_id = ?2 WHERE id = ?1"
open Rresult.R.Infix
let migrate _datadir (module Db : Caqti_blocking.CONNECTION) =
Grej.check_version ~user_version:old_version (module Db) >>= fun () ->
Db.exec add_input_id_to_build () >>= fun () ->
Db.collect_list builds () >>= fun builds ->
Grej.list_iter_result (fun (id, opam_sha, env_sha, pkg_sha) ->
let input_id = Mirage_crypto.Hash.SHA256.digest (Cstruct.concat [ opam_sha ; env_sha ; pkg_sha ]) in
Db.exec set_input_id (id, input_id))
builds >>= fun () ->
Db.exec (Grej.set_version new_version) ()
let rollback _datadir (module Db : Caqti_blocking.CONNECTION) =
Grej.check_version ~user_version:new_version (module Db) >>= fun () ->
Db.exec old_build () >>= fun () ->
Db.exec copy_build () >>= fun () ->
Db.exec drop_build () >>= fun () ->
Db.exec rename_build () >>= fun () ->
Db.exec idx_build_job_start () >>= fun () ->
Db.exec (Grej.set_version old_version) ()

View file

@ -4,7 +4,7 @@ open Rep
let application_id = 1234839235l let application_id = 1234839235l
(* Please update this when making changes! *) (* Please update this when making changes! *)
let current_version = 11L let current_version = 12L
type 'a id = 'a Rep.id type 'a id = 'a Rep.id
@ -322,6 +322,7 @@ module Build = struct
main_binary INTEGER, main_binary INTEGER,
user INTEGER NOT NULL, user INTEGER NOT NULL,
job INTEGER NOT NULL, job INTEGER NOT NULL,
input_id BLOB, -- sha256 (sha256<opam-switch> || sha256<build-environment> || sha256<system-packages>)
FOREIGN KEY(main_binary) REFERENCES build_artifact(id) DEFERRABLE INITIALLY DEFERRED, FOREIGN KEY(main_binary) REFERENCES build_artifact(id) DEFERRABLE INITIALLY DEFERRED,
FOREIGN KEY(user) REFERENCES user(id), FOREIGN KEY(user) REFERENCES user(id),
@ -457,12 +458,12 @@ module Build = struct
let add = let add =
Caqti_request.exec Caqti_request.exec
t (Caqti_type.tup2 t (Caqti_type.option cstruct))
{| INSERT INTO build {| INSERT INTO build
(uuid, start_d, start_ps, finish_d, finish_ps, (uuid, start_d, start_ps, finish_d, finish_ps,
result_kind, result_code, result_msg, console, script, main_binary, user, job) result_kind, result_code, result_msg, console, script, main_binary, user, job, input_id)
VALUES VALUES
(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|} |}
let get_by_hash = let get_by_hash =

View file

@ -171,7 +171,7 @@ sig
Caqti_request.t Caqti_request.t
val get_other_builds_with_same_output : val get_other_builds_with_same_output :
([`build] id, Meta.t, [ `Many | `One | `Zero ]) Caqti_request.t ([`build] id, Meta.t, [ `Many | `One | `Zero ]) Caqti_request.t
val add : (t, unit, [< `Many | `One | `Zero > `Zero ]) Caqti_request.t val add : (t * Cstruct.t option, unit, [< `Many | `One | `Zero > `Zero ]) Caqti_request.t
val get_by_hash : val get_by_hash :
(Cstruct.t, string * t, [< `Many | `One | `Zero > `One `Zero]) Caqti_request.t (Cstruct.t, string * t, [< `Many | `One | `Zero > `One `Zero]) Caqti_request.t
val set_main_binary : ([`build] id * [`build_artifact] id, unit, [< `Many | `One | `Zero > `Zero ]) Caqti_request.t val set_main_binary : ([`build] id * [`build_artifact] id, unit, [< `Many | `One | `Zero > `Zero ]) Caqti_request.t

View file

@ -235,6 +235,20 @@ let infer_section_and_synopsis artifacts =
in in
Some section, infer_synopsis_and_descr opam_switch Some section, infer_synopsis_and_descr opam_switch
let compute_input_id artifacts =
let get_hash filename =
match List.find_opt (fun b -> Fpath.equal b.Builder_db.filepath filename) artifacts with
| None -> None
| Some x -> Some x.sha256
in
match
get_hash (Fpath.v "opam-switch"),
get_hash (Fpath.v "build-environment"),
get_hash (Fpath.v "system-packages")
with
| Some a, Some b, Some c -> Some (Mirage_crypto.Hash.SHA256.digest (Cstruct.concat [a;b;c]))
| _ -> None
let add_build let add_build
datadir datadir
user_id user_id
@ -271,9 +285,10 @@ let add_build
let readme_tag = "readme.md" in let readme_tag = "readme.md" in
Db.exec Tag.try_add readme_tag >>= fun () -> Db.exec Tag.try_add readme_tag >>= fun () ->
Db.find Tag.get_id_by_name readme_tag >>= fun readme_id -> Db.find Tag.get_id_by_name readme_tag >>= fun readme_id ->
Db.exec Build.add { Build.uuid; start; finish; result; let input_id = compute_input_id artifacts in
Db.exec Build.add ({ Build.uuid; start; finish; result;
console; script = job.Builder.script; console; script = job.Builder.script;
main_binary = None; user_id; job_id } >>= fun () -> main_binary = None; user_id; job_id }, input_id) >>= fun () ->
Db.find last_insert_rowid () >>= fun id -> Db.find last_insert_rowid () >>= fun id ->
let sec_syn = infer_section_and_synopsis raw_artifacts in let sec_syn = infer_section_and_synopsis raw_artifacts in
let add_or_update tag_id tag_value = let add_or_update tag_id tag_value =

View file

@ -160,9 +160,8 @@ let add_test_build user_id (module Db : CONN) =
Db.start () >>= fun () -> Db.start () >>= fun () ->
Db.exec Job.try_add job_name >>= fun () -> Db.exec Job.try_add job_name >>= fun () ->
Db.find_opt Job.get_id_by_name job_name >>= fail_if_none >>= fun job_id -> Db.find_opt Job.get_id_by_name job_name >>= fail_if_none >>= fun job_id ->
Db.exec Build.add { Build.uuid; start; finish; result; console; script; Db.exec Build.add ({ Build.uuid; start; finish; result; console; script;
main_binary = None; user_id; main_binary = None; user_id; job_id }, None) >>= fun () ->
job_id } >>= fun () ->
Db.find last_insert_rowid () >>= fun id -> Db.find last_insert_rowid () >>= fun id ->
Db.exec Build_artifact.add (main_binary, id) >>= fun () -> Db.exec Build_artifact.add (main_binary, id) >>= fun () ->
Db.find last_insert_rowid () >>= fun main_binary_id -> Db.find last_insert_rowid () >>= fun main_binary_id ->
@ -229,9 +228,8 @@ let add_second_build (module Db : CONN) =
Db.find_opt User.get_user username >>= fail_if_none >>= fun (user_id, _) -> Db.find_opt User.get_user username >>= fail_if_none >>= fun (user_id, _) ->
Db.start () >>= fun () -> Db.start () >>= fun () ->
Db.find_opt Job.get_id_by_name job_name >>= fail_if_none >>= fun job_id -> Db.find_opt Job.get_id_by_name job_name >>= fail_if_none >>= fun job_id ->
Db.exec Build.add { Build.uuid; start; finish; result; console; script; Db.exec Build.add ({ Build.uuid; start; finish; result; console; script;
main_binary = None; user_id; job_id; main_binary = None; user_id; job_id; }, None) >>= fun () ->
} >>= fun () ->
Db.find last_insert_rowid () >>= fun id -> Db.find last_insert_rowid () >>= fun id ->
Db.exec Build_artifact.add (main_binary, id) >>= fun () -> Db.exec Build_artifact.add (main_binary, id) >>= fun () ->
Db.find last_insert_rowid () >>= fun main_binary_id -> Db.find last_insert_rowid () >>= fun main_binary_id ->