revise startup, address urls pointing to same sha256 and support mirrors (upstream and in opam file) #24
3 changed files with 77 additions and 30 deletions
|
@ -1,5 +1,3 @@
|
||||||
|
|
||||||
|
|
||||||
module Hash = struct
|
module Hash = struct
|
||||||
type t = (* OpamHash.kind = *) [ `MD5 | `SHA256 | `SHA512 ]
|
type t = (* OpamHash.kind = *) [ `MD5 | `SHA256 | `SHA512 ]
|
||||||
|
|
||||||
|
|
|
@ -35,12 +35,31 @@ let extract_url_checksum filename items =
|
||||||
List.find_opt
|
List.find_opt
|
||||||
(function { pelem = Variable ({ pelem = "checksum" ; _ }, _); _ } -> true | _ -> false)
|
(function { pelem = Variable ({ pelem = "checksum" ; _ }, _); _ } -> true | _ -> false)
|
||||||
items
|
items
|
||||||
|
and mirrors =
|
||||||
|
List.find_opt
|
||||||
|
(function { pelem = Variable ({ pelem = "mirrors" ; _ }, _); _ } -> true | _ -> false)
|
||||||
|
items
|
||||||
in
|
in
|
||||||
let url =
|
let url =
|
||||||
match url, archive with
|
match url, archive with
|
||||||
| Some { pelem = Variable (_, { pelem = String url ; _ }) ; _ }, None -> Ok url
|
| Some { pelem = Variable (_, { pelem = String url ; _ }) ; _ }, None -> Ok url
|
||||||
| None, Some { pelem = Variable (_, { pelem = String url ; _ }); _ } -> Ok url
|
| None, Some { pelem = Variable (_, { pelem = String url ; _ }); _ } -> Ok url
|
||||||
| _ -> Error (`Msg "neither 'src' nor 'archive' present")
|
| _ -> Error (`Msg "neither 'src' nor 'archive' present")
|
||||||
|
and mirrors = match mirrors with
|
||||||
|
| None -> []
|
||||||
|
| Some { pelem = Variable (_, { pelem = String url ; _ }) ; _ } -> [ url ]
|
||||||
|
| Some { pelem = Variable (_, { pelem = List { pelem = urls ; _ } ; _ }) } ->
|
||||||
|
List.fold_left (fun acc -> function
|
||||||
|
| { pelem = String url ; _ } -> url :: acc
|
||||||
|
|||||||
|
| v ->
|
||||||
|
Logs.err (fun m -> m "bad mirror data (expected a string in the list): %s"
|
||||||
|
(OpamPrinter.FullPos.value v));
|
||||||
|
acc)
|
||||||
|
[] urls
|
||||||
|
| Some v ->
|
||||||
|
Logs.err (fun m -> m "bad mirror data (expected string or string list): %s"
|
||||||
|
(OpamPrinter.FullPos.items [ v ]));
|
||||||
|
[]
|
||||||
in
|
in
|
||||||
let csum, csum_errs =
|
let csum, csum_errs =
|
||||||
match checksum with
|
match checksum with
|
||||||
|
@ -79,7 +98,7 @@ let extract_url_checksum filename items =
|
||||||
| _ -> Error (`Msg "couldn't find or decode 'checksum'"), []
|
| _ -> Error (`Msg "couldn't find or decode 'checksum'"), []
|
||||||
in
|
in
|
||||||
(match url, csum with
|
(match url, csum with
|
||||||
| Ok url, Ok csum -> Ok (url, csum)
|
| Ok url, Ok csum -> Ok (url, csum, mirrors)
|
||||||
reynir
commented
The opam source code does The opam source code does `url :: mirrors` FWIW. Just an observation; not a request for change.
hannes
commented
We keep the url in the map as key, and just carry around a set of other urls for the same artifact. Not sure whether it is worth to have the url as key in the map anymore, though we discovered some opam packages that used the same archive with different checksums... We keep the url in the map as key, and just carry around a set of other urls for the same artifact.
Not sure whether it is worth to have the url as key in the map anymore, though we discovered some opam packages that used the same archive with different checksums...
|
|||||||
| Error _ as e, _
|
| Error _ as e, _
|
||||||
| _, (Error _ as e) -> e), csum_errs
|
| _, (Error _ as e) -> e), csum_errs
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,10 @@ module K = struct
|
||||||
let doc = Arg.info ~doc:"Only check the cache" ["check"] in
|
let doc = Arg.info ~doc:"Only check the cache" ["check"] in
|
||||||
Mirage_runtime.register_arg Arg.(value & flag doc)
|
Mirage_runtime.register_arg Arg.(value & flag doc)
|
||||||
|
|
||||||
|
let upstream_caches =
|
||||||
|
let doc = Arg.info ~doc:"Upstream caches (e.g. https://opam.ocaml.org/cache)" ["upstream-cache"] in
|
||||||
|
Mirage_runtime.register_arg Arg.(value & opt_all string [] doc)
|
||||||
|
|
||||||
let skip_verify_sha256 =
|
let skip_verify_sha256 =
|
||||||
let doc = Arg.info
|
let doc = Arg.info
|
||||||
~doc:"Skip verification of the SHA256 checksums of the cache contents, \
|
~doc:"Skip verification of the SHA256 checksums of the cache contents, \
|
||||||
|
@ -137,20 +141,33 @@ module Make
|
||||||
let path = Mirage_kv.Key.to_string path in
|
let path = Mirage_kv.Key.to_string path in
|
||||||
let url_csums, errs = Opam_file.extract_urls path data in
|
let url_csums, errs = Opam_file.extract_urls path data in
|
||||||
List.iter (fun (`Msg msg) -> add_parse_error path msg) errs;
|
List.iter (fun (`Msg msg) -> add_parse_error path msg) errs;
|
||||||
List.fold_left (fun acc (url, csums) ->
|
let upstream hm =
|
||||||
|
List.fold_left (fun set (hash, hash_value) ->
|
||||||
|
List.fold_left (fun set cache_url ->
|
||||||
|
let url =
|
||||||
|
cache_url ^ "/" ^ Archive_checksum.Hash.to_string hash ^
|
||||||
|
"/" ^ String.sub hash_value 0 2 ^ "/" ^ hash_value
|
||||||
|
in
|
||||||
|
SSet.add url set)
|
||||||
|
set (K.upstream_caches ()))
|
||||||
|
SSet.empty (HM.bindings hm)
|
||||||
|
in
|
||||||
|
List.fold_left (fun acc (url, csums, mirrors) ->
|
||||||
if HM.cardinal csums = 0 then
|
if HM.cardinal csums = 0 then
|
||||||
(add_parse_error path ("no checksums for " ^ url);
|
(add_parse_error path ("no checksums for " ^ url);
|
||||||
acc)
|
acc)
|
||||||
else
|
else
|
||||||
|
let mirrors = SSet.of_list mirrors in
|
||||||
SM.update url (function
|
SM.update url (function
|
||||||
| None -> Some csums
|
| None -> Some (csums, SSet.union mirrors (upstream csums))
|
||||||
| Some csums' ->
|
| Some (csums', mirrors') ->
|
||||||
if HM.for_all (fun h v ->
|
if HM.for_all (fun h v ->
|
||||||
match HM.find_opt h csums with
|
match HM.find_opt h csums with
|
||||||
| None -> true | Some v' -> String.equal v v')
|
| None -> true | Some v' -> String.equal v v')
|
||||||
csums'
|
csums'
|
||||||
then
|
then
|
||||||
Some (HM.union (fun _h v _v' -> Some v) csums csums')
|
Some (HM.union (fun _h v _v' -> Some v) csums csums',
|
||||||
|
SSet.union mirrors mirrors')
|
||||||
else begin
|
else begin
|
||||||
add_parse_error path (Fmt.str
|
add_parse_error path (Fmt.str
|
||||||
"mismatching hashes for %s: %s vs %s"
|
"mismatching hashes for %s: %s vs %s"
|
||||||
|
@ -915,7 +932,7 @@ stamp: %S
|
||||||
remaining_downloads := SM.cardinal urls;
|
remaining_downloads := SM.cardinal urls;
|
||||||
archives := SM.cardinal urls;
|
archives := SM.cardinal urls;
|
||||||
let pool = Lwt_pool.create parallel_downloads (Fun.const Lwt.return_unit) in
|
let pool = Lwt_pool.create parallel_downloads (Fun.const Lwt.return_unit) in
|
||||||
Lwt_list.iter_p (fun (url, csums) ->
|
Lwt_list.iter_p (fun (url, (csums, mirrors)) ->
|
||||||
Lwt_pool.use pool @@ fun () ->
|
Lwt_pool.use pool @@ fun () ->
|
||||||
HM.fold (fun h v r ->
|
HM.fold (fun h v r ->
|
||||||
r >>= function
|
r >>= function
|
||||||
|
@ -926,29 +943,42 @@ stamp: %S
|
||||||
decr remaining_downloads;
|
decr remaining_downloads;
|
||||||
Lwt.return_unit
|
Lwt.return_unit
|
||||||
| false ->
|
| false ->
|
||||||
|
let rec download url mirrors =
|
||||||
|
let retry () =
|
||||||
|
if SSet.is_empty mirrors then begin
|
||||||
|
decr remaining_downloads;
|
||||||
|
Lwt.return_unit
|
||||||
|
end else
|
||||||
|
let elt, mirrors =
|
||||||
|
let e = SSet.min_elt mirrors in
|
||||||
|
e, SSet.remove e mirrors
|
||||||
|
in
|
||||||
|
download elt mirrors
|
||||||
|
in
|
||||||
let quux, body_init = Disk.init_write disk csums in
|
let quux, body_init = Disk.init_write disk csums in
|
||||||
add_to_active url (Ptime.v (Pclock.now_d_ps ()));
|
add_to_active url (Ptime.v (Pclock.now_d_ps ()));
|
||||||
Http_mirage_client.request http_client url (Disk.write_partial disk quux url) body_init >>= function
|
Http_mirage_client.request http_client url (Disk.write_partial disk quux url) body_init >>= function
|
||||||
| Ok (resp, r) ->
|
| Ok (resp, r) ->
|
||||||
decr remaining_downloads;
|
|
||||||
begin match r with
|
begin match r with
|
||||||
| Error `Bad_response ->
|
| Error `Bad_response ->
|
||||||
add_failed url (Ptime.v (Pclock.now_d_ps ()))
|
add_failed url (Ptime.v (Pclock.now_d_ps ()))
|
||||||
(`Bad_response (resp.status, resp.reason));
|
(`Bad_response (resp.status, resp.reason));
|
||||||
Lwt.return_unit
|
retry ()
|
||||||
| Error `Write_error e ->
|
| Error `Write_error e ->
|
||||||
add_failed url (Ptime.v (Pclock.now_d_ps ())) (`Write_error e);
|
add_failed url (Ptime.v (Pclock.now_d_ps ())) (`Write_error e);
|
||||||
Lwt.return_unit
|
retry ()
|
||||||
| Error `Swap e ->
|
| Error `Swap e ->
|
||||||
add_failed url (Ptime.v (Pclock.now_d_ps ())) (`Swap e);
|
add_failed url (Ptime.v (Pclock.now_d_ps ())) (`Swap e);
|
||||||
Lwt.return_unit
|
retry ()
|
||||||
| Ok (digests, body) ->
|
| Ok (digests, body) ->
|
||||||
|
decr remaining_downloads;
|
||||||
Disk.finalize_write disk quux ~url body csums digests
|
Disk.finalize_write disk quux ~url body csums digests
|
||||||
end
|
end
|
||||||
| Error me ->
|
| Error me ->
|
||||||
decr remaining_downloads;
|
|
||||||
add_failed url (Ptime.v (Pclock.now_d_ps ())) (`Mimic me);
|
add_failed url (Ptime.v (Pclock.now_d_ps ())) (`Mimic me);
|
||||||
Lwt.return_unit)
|
retry ()
|
||||||
|
in
|
||||||
|
download url mirrors)
|
||||||
(SM.bindings urls) >>= fun () ->
|
(SM.bindings urls) >>= fun () ->
|
||||||
Disk.update_caches disk >|= fun () ->
|
Disk.update_caches disk >|= fun () ->
|
||||||
Logs.info (fun m -> m "downloading of %d urls done" (SM.cardinal urls))
|
Logs.info (fun m -> m "downloading of %d urls done" (SM.cardinal urls))
|
||||||
|
|
Loading…
Reference in a new issue
I don't think the order is significant, but it's worth noting we reverse the order here.