Merge pull request 'Implement builder-db vacuum {older-than,latest-n}' (#185) from vacuum into main

Reviewed-on: #185
This commit is contained in:
Reynir Björnsson 2024-02-13 15:08:59 +00:00
commit 97b8bb1d85
6 changed files with 264 additions and 49 deletions

View file

@ -148,6 +148,26 @@ let access_remove () dbpath username jobname =
in
or_die 1 r
let delete_build datadir (module Db : Caqti_blocking.CONNECTION) jobname id uuid =
let dir = Fpath.(v datadir / jobname / Uuidm.to_string uuid) in
(match Bos.OS.Dir.delete ~recurse:true dir with
| Ok _ -> ()
| Error `Msg e -> Logs.warn (fun m -> m "failed to remove build directory %a: %s" Fpath.pp dir e));
let* () =
Db.iter_s build_artifacts_to_orphan
(fun sha256 ->
let p = Fpath.(v datadir // artifact_path sha256) in
match Bos.OS.Path.delete p with
| Ok () -> Ok ()
| Error `Msg e ->
Logs.warn (fun m -> m "failed to remove orphan artifact %a: %s"
Fpath.pp p e);
Ok ())
id
in
let* () = Db.exec Builder_db.Build_artifact.remove_by_build id in
Db.exec Builder_db.Build.remove id
let job_remove () datadir jobname =
let dbpath = datadir ^ "/builder.sqlite3" in
let r =
@ -167,24 +187,7 @@ let job_remove () datadir jobname =
let* () =
List.fold_left (fun r (build_id, build) ->
let* () = r in
let dir = Fpath.(v datadir / jobname / Uuidm.to_string build.Builder_db.Build.uuid) in
(match Bos.OS.Dir.delete ~recurse:true dir with
| Ok _ -> ()
| Error `Msg e -> Logs.warn (fun m -> m "failed to remove build directory %a: %s" Fpath.pp dir e));
let* () =
Db.iter_s build_artifacts_to_orphan
(fun sha256 ->
let p = Fpath.(v datadir // artifact_path sha256) in
match Bos.OS.Path.delete p with
| Ok () -> Ok ()
| Error `Msg e ->
Logs.warn (fun m -> m "failed to remove orphan artifact %a: %s"
Fpath.pp p e);
Ok ())
build_id
in
let* () = Db.exec Builder_db.Build_artifact.remove_by_build build_id in
Db.exec Builder_db.Build.remove build_id)
delete_build datadir (module Db) jobname build_id build.Builder_db.Build.uuid)
(Ok ())
builds
in
@ -202,6 +205,96 @@ let job_remove () datadir jobname =
in
or_die 1 r
let vacuum datadir (module Db : Caqti_blocking.CONNECTION) platform_opt job_id predicate =
let* jobname = Db.find Builder_db.Job.get job_id in
let* builds =
match predicate with
| `Date older_than ->
Db.collect_list Builder_db.Build.get_builds_older_than (job_id, platform_opt, older_than)
| `Latest latest_n ->
Db.collect_list Builder_db.Build.get_builds_excluding_latest_n (job_id, platform_opt, latest_n)
| `Latest_successful latest_n ->
let* latest_n =
Db.find_opt Builder_db.Build.get_nth_latest_successful
(job_id, platform_opt, latest_n)
in
match latest_n with
| None ->
Ok []
| Some (id, latest_n) ->
let+ builds =
Db.collect_list Builder_db.Build.get_builds_older_than
(job_id, platform_opt, latest_n.finish)
in
(* Unfortunately, get_builds_older_than is non-strict comparison;
so we need to filter out [latest_n]. *)
List.filter (fun (id', _) -> id <> id') builds
in
let pp_reason ppf = function
| `Date older_than ->
Format.fprintf ppf "has no builds older than %a" (Ptime.pp_rfc3339 ()) older_than
| `Latest n ->
Format.fprintf ppf "has fewer than %d builds" n
| `Latest_successful n ->
Format.fprintf ppf "has fewer than %d successful builds" n
in
if builds = [] then
(* NOTE: this function may be called on *all* jobs, and in that case maybe
this is too verbose? *)
Logs.info (fun m -> m "Job %s %a; not removing any builds"
jobname pp_reason predicate);
List.fold_left (fun r (build_id, build) ->
let* () = r in
let* () = Db.start () in
let* () = Db.exec defer_foreign_keys () in
match
delete_build datadir (module Db) jobname build_id
build.Builder_db.Build.uuid
with
| Ok () -> Db.commit ()
| Error _ as e ->
let* () = Db.rollback () in
e)
(Ok ())
builds
let vacuum () datadir platform_opt jobnames predicate =
let dbpath = datadir ^ "/builder.sqlite3" in
let r =
let* (module Db : Caqti_blocking.CONNECTION) =
connect (Uri.make ~scheme:"sqlite3" ~path:dbpath ~query:["create", ["false"]] ())
in
let* jobs =
match jobnames with
| [] ->
(* We default to all jobs if no jobnames were specified *)
let* jobs = Db.collect_list Builder_db.Job.get_all_with_section_synopsis () in
Ok (List.map (fun (job_id, _, _, _) -> job_id) jobs)
| _ :: _ ->
let* (jobs, unknown_jobnames) =
List.fold_left
(fun r jobname ->
let* (jobs, unknown_jobnames) = r in
let* job_id_opt = Db.find_opt Builder_db.Job.get_id_by_name jobname in
match job_id_opt with
| Some job_id -> Ok (job_id :: jobs, unknown_jobnames)
| None -> Ok (jobs, jobname :: unknown_jobnames))
(Ok ([], []))
jobnames
in
match unknown_jobnames with
| [] -> Ok jobs
| _ :: _ ->
Error (`Msg ("Unknown job(s): " ^ String.concat ", " unknown_jobnames))
in
List.fold_left (fun r jobid ->
let* () = r in
vacuum datadir (module Db) platform_opt jobid predicate)
(Ok ())
jobs
in
or_die 1 r
let input_ids =
Caqti_type.unit ->* Builder_db.Rep.cstruct @@
"SELECT DISTINCT input_id FROM build WHERE input_id IS NOT NULL"
@ -728,81 +821,89 @@ let help man_format cmds = function
else `Error (true, "Unknown command: " ^ cmd)
let dbpath =
let doc = "sqlite3 database path" in
let doc = "sqlite3 database path." in
Cmdliner.Arg.(value &
opt non_dir_file (Builder_system.default_datadir ^ "/builder.sqlite3") &
info ~doc ["dbpath"])
let dbpath_new =
let doc = "sqlite3 database path" in
let doc = "sqlite3 database path." in
Cmdliner.Arg.(value &
opt string (Builder_system.default_datadir ^ "/builder.sqlite3") &
info ~doc ["dbpath"])
let datadir =
let doc = "data directory" in
let doc = "Data directory." in
let env = Cmdliner.Cmd.Env.info "BUILDER_WEB_DATADIR" in
Cmdliner.Arg.(value &
opt dir Builder_system.default_datadir &
info ~doc ["datadir"; "d"])
info ~doc ~env ["datadir"; "d"])
let cachedir =
let doc = "cache directory" in
let doc = "Cache directory." in
let env = Cmdliner.Cmd.Env.info "BUILDER_WEB_CACHEDIR" in
Cmdliner.Arg.(value &
opt (some dir) None &
info ~doc ["cachedir"])
info ~doc ~env ["cachedir"])
let jobname =
let doc = "jobname" in
let doc = "Jobname." in
Cmdliner.Arg.(required &
pos 0 (some string) None &
info ~doc ~docv:"JOBNAME" [])
let username =
let doc = "username" in
let doc = "Username." in
Cmdliner.Arg.(required &
pos 0 (some string) None &
info ~doc ~docv:"USERNAME" [])
let password_iter =
let doc = "password hash count" in
let doc = "Password hash count." in
Cmdliner.Arg.(value &
opt (some int) None &
info ~doc ["hash-count"])
let scrypt_n =
let doc = "scrypt n parameter" in
let doc = "scrypt n parameter." in
Cmdliner.Arg.(value &
opt (some int) None &
info ~doc ["scrypt-n"])
let scrypt_r =
let doc = "scrypt r parameter" in
let doc = "scrypt r parameter." in
Cmdliner.Arg.(value &
opt (some int) None &
info ~doc ["scrypt-r"])
let scrypt_p =
let doc = "scrypt p parameter" in
let doc = "scrypt p parameter." in
Cmdliner.Arg.(value &
opt (some int) None &
info ~doc ["scrypt-p"])
let unrestricted =
let doc = "unrestricted user" in
let doc = "Unrestricted user." in
Cmdliner.Arg.(value & flag & info ~doc [ "unrestricted" ])
let job =
let doc = "job" in
let doc = "Job." in
Cmdliner.Arg.(required &
pos 1 (some string) None &
info ~doc ~docv:"JOB" [])
let build =
let doc = "build uuid" in
let doc = "Build uuid." in
Cmdliner.Arg.(required &
pos 0 (some string) None &
info ~doc ~docv:"BUILD" [])
let platform =
let doc = "Platform." in
Cmdliner.Arg.(value &
opt (some string) None &
info ~doc ~docv:"PLATFORM" ["platform"])
let full_dest =
let doc = "path to write build file" in
Cmdliner.Arg.(value & opt string "full" &
@ -876,6 +977,102 @@ let job_remove_cmd =
let info = Cmd.info ~doc "job-remove" in
Cmd.v info term
let vacuum_cmd =
let jobs =
Arg.(value & opt_all string [] & info ~doc:"Job(s). Can be passed multiple times." ~docv:"JOB" ["job"])
in
let ptime_conv =
let parse s =
match Ptime.of_rfc3339 s with
| Ok (ptime, (None | Some 0), _) ->
Ok (`Date ptime)
| Ok _ -> Error (`Msg "only UTC timezone is allowed")
| Error `RFC3339 (_range, e) ->
Error (`Msg (Format.asprintf "bad RFC3339 date-time: %a" Ptime.pp_rfc3339_error e))
and pp ppf (`Date ptime) =
Ptime.pp_rfc3339 () ppf ptime
in
Arg.conv (parse, pp)
in
let older_than =
let doc = "cut-off date-time" in
Arg.(required & pos 0 (some ptime_conv) None & info ~doc ~docv:"OLDER-THAN" [])
in
(* TODO(reynir): for now we disallow 0 so as to avoid ending up with jobs
without builds. I'm unsure how well builder-web works with empty jobs.
Then again we don't do this check for older-than... *)
let latest_n =
let doc = "latest N" in
let latest_n =
let parse s =
match Arg.(conv_parser int) s with
| Ok n when n > 0 -> Ok (`Latest n)
| Ok _ -> Error (`Msg "must be positive integer")
| Error _ as e -> e
and pp ppf (`Latest n) =
Arg.(conv_printer int) ppf n
in
Arg.conv (parse, pp)
in
Arg.(required & pos 0 (some latest_n) None & info ~doc ~docv:"LATEST-N" [])
in
let latest_n_succesful =
let doc = "latest N successful" in
let latest_n =
let parse s =
match Arg.(conv_parser int) s with
| Ok n when n > 0 -> Ok (`Latest_successful n)
| Ok _ -> Error (`Msg "must be positive integer")
| Error _ as e -> e
and pp ppf (`Latest_successful n) =
Arg.(conv_printer int) ppf n
in
Arg.conv (parse, pp)
in
Arg.(required & pos 0 (some latest_n) None & info ~doc ~docv:"LATEST-N" [])
in
let job_default_txt =
"By default all jobs are vacuumed, unless any jobs are specified using --job."
in
let vacuum_older_than =
let doc =
Printf.sprintf "Remove builds older than a date. %s" job_default_txt
in
let info = Cmd.info ~doc "older-than" in
let term =
Term.(const vacuum $ setup_log $ datadir $ platform $ jobs $ older_than)
in
Cmd.v info term
in
let vacuum_except_latest_n =
let doc =
Printf.sprintf "Remove all builds except for the latest N builds (successful or not). %s"
job_default_txt
in
let info = Cmd.info ~doc "except-latest" in
let term =
Term.(const vacuum $ setup_log $ datadir $ platform $ jobs $ latest_n)
in
Cmd.v info term
in
let vacuum_except_latest_n_successful =
let doc =
Printf.sprintf "Remove all builds except for builds newer than the Nth latest successful build. %s"
job_default_txt
in
let info = Cmd.info ~doc "except-latest-successful" in
let term =
Term.(const vacuum $ setup_log $ datadir $ platform $ jobs $ latest_n_succesful)
in
Cmd.v info term
in
let doc = "Remove old builds" in
Cmd.group (Cmd.info ~doc "vacuum") [
vacuum_older_than;
vacuum_except_latest_n;
vacuum_except_latest_n_successful;
]
let extract_full_cmd =
let doc = "extract a build from the database" in
let term = Term.(
@ -929,6 +1126,7 @@ let () =
verify_input_id_cmd;
verify_data_dir_cmd;
verify_cache_dir_cmd;
extract_full_cmd ]
extract_full_cmd;
vacuum_cmd ]
|> Cmdliner.Cmd.eval
|> exit

View file

@ -196,10 +196,11 @@ let ip_port : (Ipaddr.V4.t * int) Arg.conv =
let datadir =
let doc = "data directory" in
let docv = "DATA_DIR" in
let env = Cmdliner.Cmd.Env.info "BUILDER_WEB_DATADIR" in
Arg.(
value &
opt dir Builder_system.default_datadir &
info [ "d"; "datadir" ] ~doc ~docv
info ~env [ "d"; "datadir" ] ~doc ~docv
)
let cachedir =

View file

@ -61,9 +61,10 @@ let help man_format migrations = function
let datadir =
let doc = "data directory containing builder.sqlite3 and data files" in
let env = Cmdliner.Cmd.Env.info "BUILDER_WEB_DATADIR" in
Cmdliner.Arg.(value &
opt dir Builder_system.default_datadir &
info ~doc ["datadir"; "d"])
info ~env ~doc ["datadir"; "d"])
let setup_log =
let setup_log level =

View file

@ -352,8 +352,8 @@ module Build = struct
|}
let get_builds_older_than =
Caqti_type.(tup3 (id `job) (option string) Rep.ptime) ->* t @@
{| SELECT uuid, start_d, start_ps, finish_d, finish_ps,
Caqti_type.(tup3 (id `job) (option string) Rep.ptime) ->* Caqti_type.tup2 (id `build) t @@
{| SELECT id, uuid, start_d, start_ps, finish_d, finish_ps,
result_code, result_msg, console, script,
platform, main_binary, input_id, user, job
FROM build
@ -364,8 +364,8 @@ module Build = struct
|}
let get_builds_excluding_latest_n =
Caqti_type.(tup3 (id `job) (option string) int) ->* t @@
{| SELECT uuid, start_d, start_ps, finish_d, finish_ps,
Caqti_type.(tup3 (id `job) (option string) int) ->* Caqti_type.tup2 (id `build) t @@
{| SELECT id, uuid, start_d, start_ps, finish_d, finish_ps,
result_code, result_msg, console, script,
platform, main_binary, input_id, user, job
FROM build
@ -376,6 +376,19 @@ module Build = struct
|}
(* "LIMIT -1 OFFSET n" is all rows except the first n *)
let get_nth_latest_successful =
Caqti_type.(tup3 (id `job) (option string) int) ->? Caqti_type.tup2 (id `build) t @@
{| SELECT id, uuid, start_d, start_ps, finish_d, finish_ps,
result_code, result_msg, console, script,
platform, main_binary, input_id, user, job
FROM build
WHERE job = $1
AND ($2 IS NULL OR platform = $2)
AND main_binary IS NOT NULL
ORDER BY start_d DESC, start_ps DESC
LIMIT 1 OFFSET $3
|}
let get_latest_successful =
Caqti_type.(tup2 (id `job) (option string)) ->? t @@
{| SELECT

View file

@ -130,9 +130,11 @@ sig
([`job] id * string option, t, [ `One | `Zero ])
Caqti_request.t
val get_builds_older_than :
([`job] id * string option * Ptime.t, t, [ `Many | `One | `Zero ]) Caqti_request.t
([`job] id * string option * Ptime.t, [`build] id * t, [ `Many | `One | `Zero ]) Caqti_request.t
val get_builds_excluding_latest_n :
([`job] id * string option * int, t, [ `Many | `One | `Zero ]) Caqti_request.t
([`job] id * string option * int, [`build] id * t, [ `Many | `One | `Zero ]) Caqti_request.t
val get_nth_latest_successful :
([`job] id * string option * int, [`build] id * t, [ `One | `Zero ]) Caqti_request.t
val get_previous_successful_different_output :
([`build] id, t, [ `One | `Zero ])
Caqti_request.t

View file

@ -281,10 +281,10 @@ let test_get_builds_older_than (module Db : CONN) =
let date = Option.get (Ptime.of_float_s (3600. /. 2.)) in
Db.find_opt Builder_db.Job.get_id_by_name job_name >>= fail_if_none >>= fun job_id ->
Db.collect_list Builder_db.Build.get_builds_older_than (job_id, None, date) >>= fun builds ->
let builds = List.map (fun { Builder_db.Build.uuid; _ } -> uuid) builds in
let builds = List.map (fun (_, { Builder_db.Build.uuid; _ }) -> uuid) builds in
Alcotest.(check (list Testable.uuid)) "last build" builds [ uuid ];
Db.collect_list Builder_db.Build.get_builds_older_than (job_id, None, Ptime_clock.now ()) >>= fun builds ->
let builds = List.map (fun { Builder_db.Build.uuid; _ } -> uuid) builds in
let builds = List.map (fun (_, { Builder_db.Build.uuid; _ }) -> uuid) builds in
(* NOTE(dinosaure): from the most recent to the older. *)
Alcotest.(check (list Testable.uuid)) "last builds" builds [ uuid'; uuid ];
Ok ()
@ -293,19 +293,19 @@ let test_builds_excluding_latest_n (module Db : CONN) =
add_second_build (module Db) >>= fun () ->
Db.find_opt Builder_db.Job.get_id_by_name job_name >>= fail_if_none >>= fun job_id ->
Db.collect_list Builder_db.Build.get_builds_excluding_latest_n (job_id, None, 1) >>= fun builds ->
let builds = List.map (fun { Builder_db.Build.uuid; _ } -> uuid) builds in
let builds = List.map (fun (_, { Builder_db.Build.uuid; _ }) -> uuid) builds in
Alcotest.(check (list Testable.uuid)) "keep recent build" builds [ uuid ];
Db.collect_list Builder_db.Build.get_builds_excluding_latest_n (job_id, None, 2) >>= fun builds ->
let builds = List.map (fun { Builder_db.Build.uuid; _ } -> uuid) builds in
let builds = List.map (fun (_, { Builder_db.Build.uuid; _ }) -> uuid) builds in
Alcotest.(check (list Testable.uuid)) "keep 2 builds" builds [];
Db.collect_list Builder_db.Build.get_builds_excluding_latest_n (job_id, None, 3) >>= fun builds ->
let builds = List.map (fun { Builder_db.Build.uuid; _ } -> uuid) builds in
let builds = List.map (fun (_, { Builder_db.Build.uuid; _ }) -> uuid) builds in
Alcotest.(check (list Testable.uuid)) "last more builds than we have" builds [];
Db.collect_list Builder_db.Build.get_builds_excluding_latest_n (job_id, None, 0) >>= fun builds ->
let builds = List.map (fun { Builder_db.Build.uuid; _ } -> uuid) builds in
let builds = List.map (fun (_, { Builder_db.Build.uuid; _ }) -> uuid) builds in
Alcotest.(check (list Testable.uuid)) "delete all builds" builds [ uuid'; uuid ];
Db.collect_list Builder_db.Build.get_builds_excluding_latest_n (job_id, None, -1) >>= fun builds ->
let builds = List.map (fun { Builder_db.Build.uuid; _ } -> uuid) builds in
let builds = List.map (fun (_, { Builder_db.Build.uuid; _ }) -> uuid) builds in
Alcotest.(check (list Testable.uuid)) "test an incomprehensible argument (-1)" builds [ uuid'; uuid ];
Ok ()