revise startup, address urls pointing to same sha256 and support mirrors (upstream and in opam file) #24

Merged
hannes merged 7 commits from startup into main 2024-11-20 10:38:22 +00:00
Showing only changes of commit da4533c2d3 - Show all commits

View file

@ -9,6 +9,10 @@ module K = struct
let doc = Arg.info ~doc:"Only check the cache" ["check"] in let doc = Arg.info ~doc:"Only check the cache" ["check"] in
Mirage_runtime.register_arg Arg.(value & flag doc) Mirage_runtime.register_arg Arg.(value & flag doc)
let skip_download =
let doc = Arg.info ~doc:"Skip downloading archives" ["skip-download"] in
Mirage_runtime.register_arg Arg.(value & flag doc)
let upstream_caches = let upstream_caches =
let doc = Arg.info ~doc:"Upstream caches (e.g. https://opam.ocaml.org/cache)" ["upstream-cache"] in let doc = Arg.info ~doc:"Upstream caches (e.g. https://opam.ocaml.org/cache)" ["upstream-cache"] in
hannes marked this conversation as resolved Outdated

"Upstream caches to use internally (e.g. https://opam.ocaml.org/cache). This makes opam-mirror try the cache(s) before going to the source and mirrors. This does not change the published "archive-mirrors:" value in the /repo endpoint."

"Upstream caches to use internally (e.g. https://opam.ocaml.org/cache). This makes opam-mirror try the cache(s) before going to the source and mirrors. This does *not* change the published "archive-mirrors:" value in the /repo endpoint."

actually, if I'm not misguided, we first go to the source, and only thereafter to mirror(s). Is this a good semantics?

actually, if I'm not misguided, we first go to the source, and only thereafter to mirror(s). Is this a good semantics?

And please feel free to push the documentation updates directly to the branch.

And please feel free to push the documentation updates directly to the branch.

Hm it turns out the exact semantics is a bit complicated to explain. You are right that we first to go the source! Then we go to the mirrors or the cache depending on how the URLs sort as they both go into a string set.

I will give this some thought and suggest a change.

Hm it turns out the exact semantics is a bit complicated to explain. You are right that we first to go the source! Then we go to the mirrors or the cache **depending on how the URLs sort** as they both go into a string set. I will give this some thought and suggest a change.

Latest commit splits mirror URLs from upstream cache URLs so the mirror URLs are tried first before the upstream caches, and adds a longer description of the option.

Latest commit splits mirror URLs from upstream cache URLs so the mirror URLs are tried first before the upstream caches, and adds a longer description of the option.
Mirage_runtime.register_arg Arg.(value & opt_all string [] doc) Mirage_runtime.register_arg Arg.(value & opt_all string [] doc)
@ -136,6 +140,10 @@ module Make
in in
Lwt_stream.from more Lwt_stream.from more
let sha256s = Hashtbl.create 13
let empty () = Hashtbl.clear sha256s

this code, using a global hashtable, is ugly and should be revised.

this code, using a global hashtable, is ugly and should be revised.
let find_urls acc path data = let find_urls acc path data =
if Mirage_kv.Key.basename path = "opam" then if Mirage_kv.Key.basename path = "opam" then
let path = Mirage_kv.Key.to_string path in let path = Mirage_kv.Key.to_string path in
@ -156,8 +164,25 @@ module Make
if HM.cardinal csums = 0 then if HM.cardinal csums = 0 then
(add_parse_error path ("no checksums for " ^ url); (add_parse_error path ("no checksums for " ^ url);
acc) acc)
else else begin
let url' =
match HM.find_opt `SHA256 csums with
| None -> url
| Some hash ->
match Hashtbl.find_opt sha256s hash with
| None -> Hashtbl.add sha256s hash url; url
| Some url' ->
if not (String.equal url url') then
Logs.warn (fun m -> m "same hash for url %s and %s" url url');
url'
in
let mirrors = SSet.of_list mirrors in let mirrors = SSet.of_list mirrors in
let url, mirrors =
if String.equal url url' then
url, mirrors
else
url', SSet.add url mirrors
in
SM.update url (function SM.update url (function
| None -> Some (csums, SSet.union mirrors (upstream csums)) | None -> Some (csums, SSet.union mirrors (upstream csums))
| Some (csums', mirrors') -> | Some (csums', mirrors') ->
@ -173,7 +198,8 @@ module Make
"mismatching hashes for %s: %s vs %s" "mismatching hashes for %s: %s vs %s"
url (hm_to_s csums') (hm_to_s csums)); url (hm_to_s csums') (hm_to_s csums));
None None
end) acc) acc url_csums end) acc
end) acc url_csums
else else
acc acc
@ -271,6 +297,7 @@ module Make
{ md5s = SM.empty ; sha512s = SM.empty ; checked = Some KS.empty ; dev; dev_md5s; dev_sha512s ; dev_swap } { md5s = SM.empty ; sha512s = SM.empty ; checked = Some KS.empty ; dev; dev_md5s; dev_sha512s ; dev_swap }
let add_checked t path = let add_checked t path =
Logs.info (fun m -> m "add checked %a" Mirage_kv.Key.pp path);
match t.checked with match t.checked with
| None -> () | None -> ()
| Some s -> t.checked <- Some (KS.add path s) | Some s -> t.checked <- Some (KS.add path s)
@ -620,6 +647,7 @@ module Make
let mtime = Option.value ~default:0 Ptime.(Span.to_int_s (to_span now)) in let mtime = Option.value ~default:0 Ptime.(Span.to_int_s (to_span now)) in
let urls = ref SM.empty in let urls = ref SM.empty in
entries_of_git ~mtime store repo urls >>= fun entries -> entries_of_git ~mtime store repo urls >>= fun entries ->
Git.empty ();
let t = Tar.out ~level:Ustar entries in let t = Tar.out ~level:Ustar entries in
let t = Tar_gz.out_gzipped ~level:4 ~mtime:(Int32.of_int mtime) Gz.Unix t in let t = Tar_gz.out_gzipped ~level:4 ~mtime:(Int32.of_int mtime) Gz.Unix t in
let buf = Buffer.create 1024 in let buf = Buffer.create 1024 in
@ -944,6 +972,7 @@ stamp: %S
Lwt.return_unit Lwt.return_unit
| false -> | false ->
let rec download url mirrors = let rec download url mirrors =
Logs.info (fun m -> m "downloading %s (%u mirrors)" url (SSet.cardinal mirrors));
let retry () = let retry () =
if SSet.is_empty mirrors then begin if SSet.is_empty mirrors then begin
decr remaining_downloads; decr remaining_downloads;
@ -957,25 +986,28 @@ stamp: %S
in in
let quux, body_init = Disk.init_write disk csums in let quux, body_init = Disk.init_write disk csums in
add_to_active url (Ptime.v (Pclock.now_d_ps ())); add_to_active url (Ptime.v (Pclock.now_d_ps ()));
Http_mirage_client.request http_client url (Disk.write_partial disk quux url) body_init >>= function if not (K.skip_download ()) then
| Ok (resp, r) -> Http_mirage_client.request http_client url (Disk.write_partial disk quux url) body_init >>= function
begin match r with | Ok (resp, r) ->
| Error `Bad_response -> begin match r with
add_failed url (Ptime.v (Pclock.now_d_ps ())) | Error `Bad_response ->
(`Bad_response (resp.status, resp.reason)); add_failed url (Ptime.v (Pclock.now_d_ps ()))
retry () (`Bad_response (resp.status, resp.reason));
| Error `Write_error e -> retry ()
add_failed url (Ptime.v (Pclock.now_d_ps ())) (`Write_error e); | Error `Write_error e ->
retry () add_failed url (Ptime.v (Pclock.now_d_ps ())) (`Write_error e);
| Error `Swap e -> retry ()
add_failed url (Ptime.v (Pclock.now_d_ps ())) (`Swap e); | Error `Swap e ->
retry () add_failed url (Ptime.v (Pclock.now_d_ps ())) (`Swap e);
| Ok (digests, body) -> retry ()
decr remaining_downloads; | Ok (digests, body) ->
Disk.finalize_write disk quux ~url body csums digests decr remaining_downloads;
end Disk.finalize_write disk quux ~url body csums digests
| Error me -> end
add_failed url (Ptime.v (Pclock.now_d_ps ())) (`Mimic me); | Error me ->
add_failed url (Ptime.v (Pclock.now_d_ps ())) (`Mimic me);
retry ()
else
retry () retry ()
in in
download url mirrors) download url mirrors)