revise startup, address urls pointing to same sha256 and support mirrors (upstream and in opam file) #24
1 changed files with 53 additions and 21 deletions
|
@ -9,6 +9,10 @@ module K = struct
|
|||
let doc = Arg.info ~doc:"Only check the cache" ["check"] in
|
||||
Mirage_runtime.register_arg Arg.(value & flag doc)
|
||||
|
||||
let skip_download =
|
||||
let doc = Arg.info ~doc:"Skip downloading archives" ["skip-download"] in
|
||||
Mirage_runtime.register_arg Arg.(value & flag doc)
|
||||
|
||||
let upstream_caches =
|
||||
let doc = Arg.info ~doc:"Upstream caches (e.g. https://opam.ocaml.org/cache)" ["upstream-cache"] in
|
||||
hannes marked this conversation as resolved
Outdated
|
||||
Mirage_runtime.register_arg Arg.(value & opt_all string [] doc)
|
||||
|
@ -136,6 +140,10 @@ module Make
|
|||
in
|
||||
Lwt_stream.from more
|
||||
|
||||
let sha256s = Hashtbl.create 13
|
||||
|
||||
let empty () = Hashtbl.clear sha256s
|
||||
hannes
commented
this code, using a global hashtable, is ugly and should be revised. this code, using a global hashtable, is ugly and should be revised.
|
||||
|
||||
let find_urls acc path data =
|
||||
if Mirage_kv.Key.basename path = "opam" then
|
||||
let path = Mirage_kv.Key.to_string path in
|
||||
|
@ -156,8 +164,25 @@ module Make
|
|||
if HM.cardinal csums = 0 then
|
||||
(add_parse_error path ("no checksums for " ^ url);
|
||||
acc)
|
||||
else
|
||||
else begin
|
||||
let url' =
|
||||
match HM.find_opt `SHA256 csums with
|
||||
| None -> url
|
||||
| Some hash ->
|
||||
match Hashtbl.find_opt sha256s hash with
|
||||
| None -> Hashtbl.add sha256s hash url; url
|
||||
| Some url' ->
|
||||
if not (String.equal url url') then
|
||||
Logs.warn (fun m -> m "same hash for url %s and %s" url url');
|
||||
url'
|
||||
in
|
||||
let mirrors = SSet.of_list mirrors in
|
||||
let url, mirrors =
|
||||
if String.equal url url' then
|
||||
url, mirrors
|
||||
else
|
||||
url', SSet.add url mirrors
|
||||
in
|
||||
SM.update url (function
|
||||
| None -> Some (csums, SSet.union mirrors (upstream csums))
|
||||
| Some (csums', mirrors') ->
|
||||
|
@ -173,7 +198,8 @@ module Make
|
|||
"mismatching hashes for %s: %s vs %s"
|
||||
url (hm_to_s csums') (hm_to_s csums));
|
||||
None
|
||||
end) acc) acc url_csums
|
||||
end) acc
|
||||
end) acc url_csums
|
||||
else
|
||||
acc
|
||||
|
||||
|
@ -271,6 +297,7 @@ module Make
|
|||
{ md5s = SM.empty ; sha512s = SM.empty ; checked = Some KS.empty ; dev; dev_md5s; dev_sha512s ; dev_swap }
|
||||
|
||||
let add_checked t path =
|
||||
Logs.info (fun m -> m "add checked %a" Mirage_kv.Key.pp path);
|
||||
match t.checked with
|
||||
| None -> ()
|
||||
| Some s -> t.checked <- Some (KS.add path s)
|
||||
|
@ -620,6 +647,7 @@ module Make
|
|||
let mtime = Option.value ~default:0 Ptime.(Span.to_int_s (to_span now)) in
|
||||
let urls = ref SM.empty in
|
||||
entries_of_git ~mtime store repo urls >>= fun entries ->
|
||||
Git.empty ();
|
||||
let t = Tar.out ~level:Ustar entries in
|
||||
let t = Tar_gz.out_gzipped ~level:4 ~mtime:(Int32.of_int mtime) Gz.Unix t in
|
||||
let buf = Buffer.create 1024 in
|
||||
|
@ -944,6 +972,7 @@ stamp: %S
|
|||
Lwt.return_unit
|
||||
| false ->
|
||||
let rec download url mirrors =
|
||||
Logs.info (fun m -> m "downloading %s (%u mirrors)" url (SSet.cardinal mirrors));
|
||||
let retry () =
|
||||
if SSet.is_empty mirrors then begin
|
||||
decr remaining_downloads;
|
||||
|
@ -957,6 +986,7 @@ stamp: %S
|
|||
in
|
||||
let quux, body_init = Disk.init_write disk csums in
|
||||
add_to_active url (Ptime.v (Pclock.now_d_ps ()));
|
||||
if not (K.skip_download ()) then
|
||||
Http_mirage_client.request http_client url (Disk.write_partial disk quux url) body_init >>= function
|
||||
| Ok (resp, r) ->
|
||||
begin match r with
|
||||
|
@ -977,6 +1007,8 @@ stamp: %S
|
|||
| Error me ->
|
||||
add_failed url (Ptime.v (Pclock.now_d_ps ())) (`Mimic me);
|
||||
retry ()
|
||||
else
|
||||
retry ()
|
||||
in
|
||||
download url mirrors)
|
||||
(SM.bindings urls) >>= fun () ->
|
||||
|
|
Loading…
Reference in a new issue
"Upstream caches to use internally (e.g. https://opam.ocaml.org/cache). This makes opam-mirror try the cache(s) before going to the source and mirrors. This does not change the published "archive-mirrors:" value in the /repo endpoint."
actually, if I'm not misguided, we first go to the source, and only thereafter to mirror(s). Is this a good semantics?
And please feel free to push the documentation updates directly to the branch.
Hm it turns out the exact semantics is a bit complicated to explain. You are right that we first to go the source! Then we go to the mirrors or the cache depending on how the URLs sort as they both go into a string set.
I will give this some thought and suggest a change.
Latest commit splits mirror URLs from upstream cache URLs so the mirror URLs are tried first before the upstream caches, and adds a longer description of the option.