Compare commits

..

10 commits

4 changed files with 270 additions and 540 deletions

View file

@ -52,7 +52,7 @@ let update_digests { md5; sha256; sha512 } data =
let init_write csums = let init_write csums =
let hash, csum = HM.max_binding csums in let hash, csum = HM.max_binding csums in
(hash, csum), Ok (empty_digests, `Init) (hash, csum), empty_digests
let digests_to_hm digests = let digests_to_hm digests =
HM.empty HM.empty

View file

@ -1,195 +0,0 @@
let archives =
let too_big =
[ "https://github.com/Opsian/opsian-ocaml/releases/download/0.1/0.1.tar.gz" ]
and hash_mismatch = [
"http://cdn.skylable.com/source/libres3-1.3.tar.gz" ;
"http://cdn.skylable.com/source/libres3-0.3.tar.gz" ;
"http://cdn.skylable.com/source/libres3-1.2.tar.gz" ;
"http://cdn.skylable.com/source/libres3-0.9.tar.gz" ;
"http://cdn.skylable.com/source/libres3-0.2.tar.gz" ;
"http://cdn.skylable.com/source/libres3-1.0.tar.gz" ;
"http://cdn.skylable.com/source/libres3-1.1.tar.gz" ;
"http://cdn.skylable.com/source/libres3-0.1.tar.gz" ;
"https://github.com/lemaetech/http-cookie/releases/download/v3.0.0/http-cookie-v3.0.0.tbz" ;
"http://oqamldebug.forge.ocamlcore.org/oqamldebug-0.9.4.tar.gz" ;
"http://oqamldebug.forge.ocamlcore.org/oqamldebug-0.9.2.tar.gz" ;
"http://oqamldebug.forge.ocamlcore.org/oqamldebug-0.9.3.tar.gz" ;
"http://oqamldebug.forge.ocamlcore.org/oqamldebug-0.9.5.tar.gz" ;
"http://oqamldebug.forge.ocamlcore.org/oqamldebug-0.9.1.tar.gz" ;
"https://github.com/OCamlPro/ezjs_fetch/archive/0.1.tar.gz" ;
"http://github.com/OCamlPro/typerex-build/archive/1.99.13-beta.tar.gz" ;
"https://github.com/mirage/dyntype/tarball/dyntype-0.8.5" ;
"https://github.com/mirage/dyntype/tarball/dyntype-0.8.3" ;
"https://github.com/mirage/dyntype/tarball/dyntype-0.8.2" ;
"https://github.com/mirage/dyntype/tarball/dyntype-0.8.4" ;
"https://github.com/mirage/mirage-http-unix/archive/v1.0.0.tar.gz" ;
"http://github.com/OCamlPro/typerex-build/archive/1.99.15-beta.tar.gz" ;
"http://github.com/OCamlPro/typerex-build/archive/1.99.14-beta.tar.gz" ;
"https://github.com/paulpatault/ocamlog/archive/v0.1.tar.gz" ;
"https://github.com/pveber/OCaml-R/archive/pre-nyc-refactoring.tar.gz" ;
"https://github.com/paulpatault/ocamlog/archive/v0.2.tar.gz" ;
"http://github.com/OCamlPro/typerex-build/archive/1.99.16-beta.tar.gz" ;
"https://github.com/FStarLang/kremlin/archive/v0.9.6.0.zip" ;
"https://gitlab.com/dailambda/plebeia/-/archive/2.0.2/plebeia-2.0.2.tar.gz" ;
"https://github.com/mirleft/ocaml-tls/archive/0.5.0.tar.gz" ;
"https://github.com/eth-sri/ELINA/archive/1.3.tar.gz" ;
"https://gitlab.com/trustworthy-refactoring/refactorer/-/archive/0.1/refactorer-0.1.zip" ;
"https://github.com/completium/archetype-lang/archive/1.3.3.tar.gz" ;
"https://github.com/chetmurthy/pa_ppx/archive/0.01.tar.gz" ;
"https://github.com/chambart/ocaml-1/archive/lto.tar.gz" ;
"https://github.com/Kappa-Dev/KaSim/archive/v3.5-250915.tar.gz" ;
"https://github.com/bsansouci/bsb-native/archive/1.9.4.tar.gz";
"https://github.com/sanette/oplot/archive/0.7.tar.gz";
"https://github.com/ulrikstrid/ocaml-cookie/releases/download/0.1.8/session-cookie-lwt-0.1.8.tbz";
]
and bad_request = [
"http://cgit.freedesktop.org/cairo-ocaml/snapshot/cairo-ocaml-1.2.0.tar.gz"
]
and not_found = [
"http://pw374.github.io/distrib/frag/frag-0.1.0.tar.gz" ;
"http://pw374.github.io/distrib/glical/glical-0.0.3.tar.gz" ;
"http://pw374.github.io/distrib/glical/glical-0.0.4.tar.gz" ;
"http://pw374.github.io/distrib/glical/glical-0.0.1.tar.gz" ;
"http://pw374.github.io/distrib/glical/glical-0.0.2.tar.gz" ;
"http://pw374.github.io/distrib/glical/glical-0.0.5.tar.gz" ;
"http://pw374.github.io/distrib/glical/glical-0.0.7.tar.gz" ;
"http://pw374.github.io/distrib/mpp/mpp-0.1.1.tar.gz" ;
"http://pw374.github.io/distrib/mpp/mpp-0.1.0.tar.gz" ;
"http://pw374.github.io/distrib/mpp/mpp-0.1.2.tar.gz" ;
"http://pw374.github.io/distrib/mpp/mpp-0.1.7.tar.gz" ;
"http://pw374.github.io/distrib/mpp/mpp-0.1.3.tar.gz" ;
"http://pw374.github.io/distrib/mpp/mpp-0.1.8.tar.gz" ;
"http://pw374.github.io/distrib/mpp/mpp-0.1.4.tar.gz" ;
"http://pw374.github.io/distrib/mpp/mpp-0.1.5.tar.gz" ;
"http://pw374.github.io/distrib/mpp/mpp-0.2.0.tar.gz" ;
"http://pw374.github.io/distrib/mpp/mpp-0.2.1.tar.gz" ;
"http://pw374.github.io/distrib/mpp/mpp-0.3.0.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.3.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.4.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.5.4.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.5.5.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.5.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.6.0.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.6.2.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.6.3.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.6.4.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.6.5.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.7.0.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.7.1.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.7.2.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.7.4.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.7.3.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.7.5.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.8.2.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.8.0.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.8.1.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.9.0.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.9.1.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.9.7.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-1.0.0.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-1.0.1.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-1.1.0.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-1.1.1.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-1.1.2.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-1.2.0.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-1.2.1.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-1.2.2.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-1.2.4.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-1.2.5.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-1.2.6.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-1.2.3.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-1.3.0.tar.gz" ;
"http://zoggy.github.com/ocamldot/ocamldot-1.0.tar.gz" ;
"http://zoggy.github.io/stog/stog-0.4.tar.gz" ;
"http://zoggy.github.io/genet/genet-0.6.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.6.1.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.9.4.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.9.6.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.9.5.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-0.9.3.tar.gz" ;
"http://pw374.github.io/distrib/omd/omd-1.1.3.tar.gz" ;
"http://coccinelle.lip6.fr/distrib/coccinelle-1.0.0-rc22.tgz" ;
"http://coccinelle.lip6.fr/distrib/coccinelle-1.0.0-rc21.tgz" ;
"http://coccinelle.lip6.fr/distrib/coccinelle-1.0.0.tgz" ;
"http://proverif.inria.fr/proverif1.96pl1.tar.gz" ;
"http://proverif.inria.fr/proverif1.97.tar.gz" ;
"http://proverif.inria.fr/proverif1.98.tar.gz" ;
"http://proverif.inria.fr/proverif1.97pl3.tar.gz" ;
"http://proverif.inria.fr/proverif1.98pl1.tar.gz" ;
"http://proverif.inria.fr/proverif1.97pl1.tar.gz" ;
"https://github.com/jrochel/eliom/archive/6.4.0.tar.gz" ;
"https://github.com/drjdn/ocaml_lua_parser/archive/1.0.1.tar.gz" ;
"https://github.com/sagotch/To.ml/archive/v1.0.0.tar.gz" ;
"https://zoggy.github.io/ocaml-rdf/ocaml-rdf-0.9.0.tar.gz" ;
"https://github.com/sagotch/To.ml/archive/v2.1.0.tar.gz" ;
"https://github.com/sagotch/To.ml/archive/v2.0.0.tar.gz" ;
"https://zoggy.github.io/ocaml-taglog/taglog-0.1.0.tar.gz" ;
"https://zoggy.github.io/ocaml-taglog/taglog-0.2.0.tar.gz" ;
"https://zoggy.github.io/ocf/ocf-0.3.0.tar.gz" ;
"https://zoggy.github.io/ojs-base/ojs-base-0.1.0.tar.gz" ;
"https://zoggy.github.io/stog/plugins/stog-writing-0.8.0.tar.gz" ;
"https://zoggy.github.io/stog/stog-0.13.0.tar.gz" ;
"https://zoggy.github.io/ocaml-taglog/taglog-0.3.0.tar.gz" ;
"https://zoggy.github.io/ocf/ocf-0.1.0.tar.gz" ;
"https://opam.ocaml.org/cache/md5/24/24b163eb77e6832747dccd6cc8a5d57c" ;
]
and forbidden = [
"https://gforge.inria.fr/frs/download.php/33440/heptagon-1.00.06.tar.gz" ;
"https://gforge.inria.fr/frs/download.php/file/33677/dose3-3.2.2.tar.gz" ;
"https://gforge.inria.fr/frs/download.php/file/34920/javalib-2.3.1.tar.bz2" ;
"https://gforge.inria.fr/frs/download.php/file/36092/javalib-2.3.2.tar.bz2" ;
"https://gforge.inria.fr/frs/download.php/file/36093/sawja-1.5.2.tar.bz2" ;
"https://gforge.inria.fr/frs/download.php/file/37154/javalib-2.3.4.tar.bz2" ;
"https://gforge.inria.fr/frs/download.php/file/37403/sawja-1.5.3.tar.bz2" ;
"https://gforge.inria.fr/frs/download.php/file/36307/javalib-2.3.3.tar.bz2" ;
"https://gforge.inria.fr/frs/download.php/file/37655/javalib-2.3.5.tar.bz2" ;
"https://gforge.inria.fr/frs/download.php/file/37656/sawja-1.5.4.tar.bz2" ;
"https://gforge.inria.fr/frs/download.php/file/34921/sawja-1.5.1.tar.bz2" ;
]
and three_o_o = [
"https://github.com/Gbury/dolmen/archive/v0.4.tar.gz" ;
"https://github.com/Stevendeo/Pilat/archive/1.3.tar.gz" ;
"https://github.com/OCamlPro/ocp-indent/archive/1.5.tar.gz" ;
"https://github.com/backtracking/combine/archive/release-0.6.zip" ;
"https://github.com/cakeplus/pa_comprehension/archive/0.4.tar.gz" ;
"https://github.com/cakeplus/mparser/archive/1.0.tar.gz" ;
"https://github.com/chenyukang/rubytt/archive/v0.1.tar.gz" ;
"https://github.com/cakeplus/pa_where/archive/0.4.tar.gz" ;
"https://github.com/metaocaml/ber-metaocaml/archive/ber-n102.tar.gz" ;
"https://github.com/cakeplus/pa_solution/archive/0.5.tar.gz" ;
"https://github.com/cakeplus/mparser/archive/1.2.1.tar.gz" ;
"https://github.com/cakeplus/pa_solution/archive/0.7.tar.gz" ;
"https://github.com/cakeplus/pa_solution/archive/0.6.tar.gz" ;
"https://github.com/mirage/mirage-tcpip/archive/v2.8.1.tar.gz" ;
"https://github.com/modlfo/pla/archive/v1.4.tar.gz" ;
"https://github.com/murmour/pa_qualified/archive/0.5.tar.gz" ;
"https://github.com/ocaml-ppx/ocamlformat/archive/v0.2.tar.gz" ;
"https://github.com/murmour/pa_qualified/archive/0.6.tar.gz" ;
"https://github.com/ocaml-ppx/ocamlformat/archive/support.0.2.tar.gz" ;
"https://github.com/ocaml/oloop/archive/0.1.2.tar.gz" ;
"https://github.com/cakeplus/mparser/archive/1.0.1.tar.gz" ;
"https://github.com/cakeplus/mparser/archive/1.1.tar.gz" ;
"https://github.com/savonet/ocaml-ffmpeg/archive/v1.0.0-rc1.tar.gz" ;
"https://github.com/ocaml/opam2web/archive/2.0.tar.gz" ;
"https://github.com/savonet/ocaml-ffmpeg/archive/v1.0.0.tar.gz" ;
]
and five_o_three = [ "https://gitlab.com/gasche/build_path_prefix_map/repository/0.2/archive.tar.gz" ]
and is_ftp = [ "ftp://ftp.netbsd.org/pub/pkgsrc/distfiles/wyrd-1.4.6.tar.gz" ]
and connect_fails = [
"http://godi.0ok.org/godi-backup/shcaml-0.1.3.tar.gz" ;
"http://www.first.in-berlin.de/software/tools/apalogretrieve/apalogretrieve-0-9-6_4.tgz" ;
"https://cavale.enseeiht.fr/osdp/osdp-0.5.4.tgz" ;
"https://cavale.enseeiht.fr/osdp/osdp-0.6.0.tgz" ;
"https://cavale.enseeiht.fr/osdp/osdp-1.0.0.tgz" ;
]
in
too_big @ hash_mismatch @ bad_request @ not_found @ forbidden @ three_o_o @ five_o_three @ is_ftp @ connect_fails

View file

@ -13,17 +13,22 @@ let hex_of_string s =
let decode_digest filename str = let decode_digest filename str =
let hex h s = let hex h s =
match hex_of_string s with match hex_of_string s with
| Ok d -> Some (h, d) | Ok d -> Ok (h, d)
| Error `Msg msg -> | Error `Msg msg as e ->
Log.warn (fun m -> m "%s invalid hex (%s) %s" filename msg s); None Log.warn (fun m -> m "%s invalid hex (%s) %s" filename msg s);
e
in in
match String.split_on_char '=' str with match String.split_on_char '=' str with
| [ data ] -> hex `MD5 data | [ data ] -> hex `MD5 data
| [ "md5" ; data ] -> hex `MD5 data | [ "md5" ; data ] -> hex `MD5 data
| [ "sha256" ; data ] -> hex `SHA256 data | [ "sha256" ; data ] -> hex `SHA256 data
| [ "sha512" ; data ] -> hex `SHA512 data | [ "sha512" ; data ] -> hex `SHA512 data
| [ hash ; _ ] -> Log.warn (fun m -> m "%s unknown hash %s" filename hash); None | [ hash ; _ ] ->
| _ -> Log.warn (fun m -> m "%s unexpected hash format %S" filename str); None Log.warn (fun m -> m "%s unknown hash %s" filename hash);
Error (`Msg ("unknown hash " ^ hash))
| _ ->
Log.warn (fun m -> m "%s unexpected hash format %S" filename str);
Error (`Msg ("unexpected hash format " ^ str))
let extract_url_checksum filename items = let extract_url_checksum filename items =
let open OpamParserTypes.FullPos in let open OpamParserTypes.FullPos in
@ -42,64 +47,73 @@ let extract_url_checksum filename items =
in in
let url = let url =
match url, archive with match url, archive with
| Some { pelem = Variable (_, { pelem = String url ; _ }) ; _ }, None -> Some url | Some { pelem = Variable (_, { pelem = String url ; _ }) ; _ }, None -> Ok url
| None, Some { pelem = Variable (_, { pelem = String url ; _ }); _ } -> Some url | None, Some { pelem = Variable (_, { pelem = String url ; _ }); _ } -> Ok url
| _ -> | _ ->
Log.warn (fun m -> m "%s neither src nor archive present" filename); None Log.warn (fun m -> m "%s neither src nor archive present" filename);
Error (`Msg "neither 'src' nor 'archive' present")
in in
let csum = let csum, csum_errs =
match checksum with match checksum with
| Some { pelem = Variable (_, { pelem = List { pelem = csums ; _ } ; _ }); _ } -> | Some { pelem = Variable (_, { pelem = List { pelem = csums ; _ } ; _ }); _ } ->
let csums = let csums, errs =
List.fold_left (fun acc -> List.fold_left (fun (csums, errs) ->
function function
| { pelem = String csum ; _ } -> | { pelem = String csum ; _ } ->
begin match decode_digest filename csum with begin match decode_digest filename csum with
| None -> acc | Error e -> csums, e :: errs
| Some (h, v) -> | Ok (h, v) ->
HM.update h (function HM.update h (function
| None -> Some v | None -> Some v
| Some v' when String.equal v v' -> None | Some v' when String.equal v v' -> None
| Some v' -> | Some v' ->
Log.warn (fun m -> m "for %s, hash %s, multiple keys are present: %s %s" Log.warn (fun m -> m "for %s, hash %s, multiple keys are present: %s %s"
(Option.value ~default:"NONE" url) (hash_to_string h) (Ohex.encode v) (Ohex.encode v')); (Result.value ~default:"NONE" url) (hash_to_string h) (Ohex.encode v) (Ohex.encode v'));
None) None)
acc csums, errs
end end
| _ -> acc) HM.empty csums | v ->
csums, `Msg (Fmt.str "bad checksum data: %s" (OpamPrinter.FullPos.value v)) :: errs)
(HM.empty, []) csums
in in
Some csums if HM.is_empty csums then
match errs with
| hd :: tl -> Error hd, tl
| [] -> Error (`Msg "empty checksums"), []
else
Ok csums, errs
| Some { pelem = Variable (_, { pelem = String csum ; _ }) ; _ } -> | Some { pelem = Variable (_, { pelem = String csum ; _ }) ; _ } ->
begin match decode_digest filename csum with begin match decode_digest filename csum with
| None -> None | Error _ as e -> e, []
| Some (h, v) -> Some (HM.singleton h v) | Ok (h, v) -> Ok (HM.singleton h v), []
end end
| _ -> | _ ->
Log.warn (fun m -> m "couldn't decode checksum in %s" filename); Log.warn (fun m -> m "couldn't decode checksum in %s" filename);
None Error (`Msg "couldn't find or decode 'checksum'"), []
in in
match url, csum with (match url, csum with
| Some url, Some cs -> Some (url, cs) | Ok url, Ok csum -> Ok (url, csum)
| _ -> None | Error _ as e, _
| _, (Error _ as e) -> e), csum_errs
let extract_checksums_and_urls filename opam = let extract_checksums_and_urls filename opam =
let open OpamParserTypes.FullPos in let open OpamParserTypes.FullPos in
List.fold_left (fun acc -> List.fold_left (fun (csum_urls, errs) ->
function function
| { pelem = Section ({ section_kind = { pelem = "url" ; _ } ; section_items = { pelem = items ; _ } ; _ }) ; _} -> | { pelem = Section ({ section_kind = { pelem = "url" ; _ } ; section_items = { pelem = items ; _ } ; _ }) ; _} ->
begin match extract_url_checksum filename items with begin match extract_url_checksum filename items with
| None -> acc | Error `Msg msg, errs' -> csum_urls, `Msg ("url: " ^ msg) :: errs' @ errs
| Some url -> url :: acc | Ok url, errs' -> url :: csum_urls, errs' @ errs
end end
| { pelem = Section ({ section_kind = { pelem = "extra-source" ; _ } ; section_name = Some { pelem ; _ } ; section_items = { pelem = items ; _ }; _ }) ; _} -> | { pelem = Section ({ section_kind = { pelem = "extra-source" ; _ } ; section_name = Some { pelem ; _ } ; section_items = { pelem = items ; _ }; _ }) ; _} ->
begin begin
Log.debug (fun m -> m "extracting for extra-source %s in %s" filename pelem); Log.debug (fun m -> m "extracting for extra-source %s in %s" filename pelem);
match extract_url_checksum filename items with match extract_url_checksum filename items with
| None -> acc | Error `Msg msg, errs' -> csum_urls, `Msg ("extra-source " ^ pelem ^ " " ^ msg) :: errs' @ errs
| Some url -> url :: acc | Ok url, errs' -> url :: csum_urls, errs' @ errs
end end
| _ -> acc) | _ -> csum_urls, errs)
[] opam.file_contents ([], []) opam.file_contents
let extract_urls filename str = let extract_urls filename str =
(* in an opam file, there may be: (* in an opam file, there may be:
@ -121,6 +135,6 @@ let extract_urls filename str =
in in
if unavailable then if unavailable then
(Log.debug (fun m -> m "%s is marked unavailable, skipping" filename); (Log.debug (fun m -> m "%s is marked unavailable, skipping" filename);
[]) [], [])
else else
extract_checksums_and_urls filename opamfile extract_checksums_and_urls filename opamfile

View file

@ -101,85 +101,89 @@ module Make
hash_to_string h ^ "=" ^ Ohex.encode v ^ "\n" ^ acc) hash_to_string h ^ "=" ^ Ohex.encode v ^ "\n" ^ acc)
hm "" hm ""
module Git = struct let parse_errors = ref SM.empty
let find_contents store =
let rec go store path acc =
Store.list store path >>= function
| Error e ->
Logs.err (fun m -> m "error %a while listing %a"
Store.pp_error e Mirage_kv.Key.pp path);
Lwt.return acc
| Ok steps ->
Lwt_list.fold_left_s (fun acc (step, _) ->
Store.exists store step >>= function
| Error e ->
Logs.err (fun m -> m "error %a for exists %a" Store.pp_error e
Mirage_kv.Key.pp step);
Lwt.return acc
| Ok None ->
Logs.warn (fun m -> m "no typ for %a" Mirage_kv.Key.pp step);
Lwt.return acc
| Ok Some `Value -> Lwt.return (step :: acc)
| Ok Some `Dictionary -> go store step acc) acc steps
in
go store Mirage_kv.Key.empty []
let find_urls store = let add_parse_error filename error =
find_contents store >>= fun paths -> parse_errors := SM.add filename error !parse_errors
let opam_paths =
List.filter (fun p -> Mirage_kv.Key.basename p = "opam") paths module Git = struct
let contents store =
let explore = ref [ Mirage_kv.Key.empty ] in
let more () =
let rec go () =
match !explore with
| [] -> Lwt.return None
| step :: tl ->
explore := tl;
Store.exists store step >>= function
| Error e ->
Logs.err (fun m -> m "error %a for exists %a" Store.pp_error e
Mirage_kv.Key.pp step);
go ()
| Ok None ->
Logs.warn (fun m -> m "no typ for %a" Mirage_kv.Key.pp step);
go ()
| Ok Some `Value -> Lwt.return (Some step)
| Ok Some `Dictionary ->
Store.list store step >>= function
| Error e ->
Logs.err (fun m -> m "error %a while listing %a"
Store.pp_error e Mirage_kv.Key.pp step);
go ()
| Ok steps ->
explore := List.map fst steps @ !explore;
go ()
in
go ()
in in
Lwt_list.fold_left_s (fun acc path -> Lwt_stream.from more
Store.get store path >|= function
| Ok data -> let find_urls acc path data =
(* TODO report parser errors *) if Mirage_kv.Key.basename path = "opam" then
(try let path = Mirage_kv.Key.to_string path in
let url_csums = Opam_file.extract_urls (Mirage_kv.Key.to_string path) data in let url_csums, errs = Opam_file.extract_urls path data in
List.fold_left (fun acc (url, csums) -> List.iter (fun (`Msg msg) -> add_parse_error path msg) errs;
if HM.cardinal csums = 0 then List.fold_left (fun acc (url, csums) ->
(Logs.warn (fun m -> m "no checksums for %s, ignoring" url); acc) if HM.cardinal csums = 0 then
else (Logs.warn (fun m -> m "no checksums for %s, ignoring" url);
SM.update url (function add_parse_error path ("no checksums for " ^ url);
| None -> Some csums
| Some csums' ->
if HM.for_all (fun h v ->
match HM.find_opt h csums with
| None -> true | Some v' -> String.equal v v')
csums'
then
Some (HM.union (fun _h v _v' -> Some v) csums csums')
else begin
Logs.warn (fun m -> m "mismatching hashes for %s: %s vs %s"
url (hm_to_s csums') (hm_to_s csums));
None
end) acc) acc url_csums
with _ ->
Logs.warn (fun m -> m "some error in %a, ignoring" Mirage_kv.Key.pp path);
acc) acc)
| Error e -> Logs.warn (fun m -> m "Store.get: %a" Store.pp_error e); acc) else
SM.empty opam_paths SM.update url (function
| None -> Some csums
| Some csums' ->
if HM.for_all (fun h v ->
match HM.find_opt h csums with
| None -> true | Some v' -> String.equal v v')
csums'
then
Some (HM.union (fun _h v _v' -> Some v) csums csums')
else begin
Logs.warn (fun m -> m "mismatching hashes for %s: %s vs %s"
url (hm_to_s csums') (hm_to_s csums));
add_parse_error path (Fmt.str
"mismatching hashes for %s: %s vs %s"
url (hm_to_s csums') (hm_to_s csums));
None
end) acc) acc url_csums
else
acc
end end
let active_downloads = ref SM.empty let active_downloads = ref SM.empty
let add_to_active url ts = let add_to_active url ts =
active_downloads := SM.add url (ts, 0, "unknown size") !active_downloads active_downloads := SM.add url (ts, 0) !active_downloads
let remove_active url = let remove_active url =
active_downloads := SM.remove url !active_downloads active_downloads := SM.remove url !active_downloads
let active_length url written length =
match SM.find_opt url !active_downloads with
| None -> ()
| Some (ts, written', _) ->
active_downloads := SM.add url (ts, written + written', length)
!active_downloads
let active_add_bytes url written = let active_add_bytes url written =
match SM.find_opt url !active_downloads with match SM.find_opt url !active_downloads with
| None -> () | None -> ()
| Some (ts, written', l) -> | Some (ts, written') ->
active_downloads := SM.add url (ts, written + written', l) active_downloads := SM.add url (ts, written + written')
!active_downloads !active_downloads
let failed_downloads = ref SM.empty let failed_downloads = ref SM.empty
@ -188,6 +192,36 @@ module Make
remove_active url; remove_active url;
failed_downloads := SM.add url (ts, reason) !failed_downloads failed_downloads := SM.add url (ts, reason) !failed_downloads
let pp_failed ppf = function
| `Write_error e ->
Fmt.pf ppf "write error: %a" KV.pp_write_error e
| `Swap e ->
Fmt.pf ppf "swap error: %a" Swap.pp_error e
| `Bad_checksum (hash, computed, expected) ->
Fmt.pf ppf "%s checksum: computed %s expected %s"
(hash_to_string hash)
(Ohex.encode computed)
(Ohex.encode expected)
| `Bad_response (status, reason) ->
Fmt.pf ppf "bad response: %a %s" H2.Status.pp_hum status reason
| `Mimic me ->
Fmt.pf ppf "mimic: %a" Mimic.pp_error me
let compare_failed a b = match a, b with
| `Write_error _, `Write_error _ -> 0
| `Write_error _, _ -> 1
| _, `Write_error _ -> -1
| `Swap _, `Swap _ -> 0
| `Swap _, _ -> 1
| _, `Swap _ -> -1
| `Bad_checksum _, `Bad_checksum _ -> 0
| `Bad_checksum _, _ -> 1
| _, `Bad_checksum _ -> -1
| `Bad_response _, `Bad_response _ -> 0
| `Bad_response _, _ -> 1
| _, `Bad_response _ -> -1
| `Mimic _, `Mimic _ -> 0
module Disk = struct module Disk = struct
type t = { type t = {
mutable md5s : string SM.t ; mutable md5s : string SM.t ;
@ -198,10 +232,6 @@ module Make
dev_swap : Swap.t ; dev_swap : Swap.t ;
} }
let pending = Mirage_kv.Key.v "pending"
let to_delete = Mirage_kv.Key.v "to-delete"
let empty dev dev_md5s dev_sha512s dev_swap = { md5s = SM.empty ; sha512s = SM.empty ; dev; dev_md5s; dev_sha512s ; dev_swap } let empty dev dev_md5s dev_sha512s dev_swap = { md5s = SM.empty ; sha512s = SM.empty ; dev; dev_md5s; dev_sha512s ; dev_swap }
let marshal_sm (sm : string SM.t) = let marshal_sm (sm : string SM.t) =
@ -267,118 +297,21 @@ module Make
in in
read_more a Optint.Int63.zero read_more a Optint.Int63.zero
(* let init_write t csums =
module HM_running = struct let quux, csums = Archive_checksum.init_write csums in
let swap = Swap.empty t.dev_swap in
let empty h = quux, Ok (csums, swap)
let module H = (val Mirage_crypto.Hash.module_of h) in
(* We need MD5, SHA256 and SHA512. [h] is likely one of the
aforementioned and in that case we don't compute the same hash twice
*)
HM.empty
|> HM.add `MD5 Mirage_crypto.Hash.MD5.empty
|> HM.add `SHA256 Mirage_crypto.Hash.SHA256.empty
|> HM.add `SHA512 Mirage_crypto.Hash.SHA512.empty
|> HM.add h H.empty
let feed t data =
HM.map (fun h v ->
let module H = (val Mirage_crypto.Hash.module_of h) in
H.feed v data)
t
let get =
HM.map (fun h v ->
let module H = (val Mirage_crypto.Hash.module_of h) in
H.get v)
end
*)
let content_length_of_string s =
match Int64.of_string s with
| len when len >= 0L -> `Fixed len
| _ | exception _ -> `Bad_response
let body_length headers =
match H2.Headers.get_multi headers "content-length" with
| [] -> `Unknown
| [ x ] -> content_length_of_string x
| hd :: tl ->
(* if there are multiple content-length headers we require them all to be
* exactly equal. *)
if List.for_all (String.equal hd) tl then
content_length_of_string hd
else
`Bad_response
let body_length (response : Http_mirage_client.response) =
if response.status <> `OK then
`Bad_response
else
body_length response.headers
let pending_key (hash, csum) =
match hash with
| `SHA512 ->
(* We can't use hex because the filename would become too long for tar *)
Mirage_kv.Key.(pending / hash_to_string hash / Base64.encode_string ~alphabet:Base64.uri_safe_alphabet ~pad:false csum)
| _ ->
Mirage_kv.Key.(pending / hash_to_string hash / Ohex.encode csum)
let to_delete_key (hash, csum) =
let rand = "random" in (* FIXME: generate random string *)
let encoded_csum =
match hash with
| `SHA512 ->
(* We can't use hex because the filename would become too long for tar *)
Base64.encode_string ~alphabet:Base64.uri_safe_alphabet ~pad:false csum
| _ ->
Ohex.encode csum
in
Mirage_kv.Key.(to_delete / hash_to_string hash / (encoded_csum ^ "." ^ rand))
let write_partial t (hash, csum) url = let write_partial t (hash, csum) url =
(* XXX: we may be in trouble if different hash functions are used for the same archive *) (* XXX: we may be in trouble if different hash functions are used for the same archive *)
let key = pending_key (hash, csum) in
let ( >>>= ) = Lwt_result.bind in let ( >>>= ) = Lwt_result.bind in
fun response r data -> fun response r data ->
Lwt.return r >>>= fun (digests, acc) -> Lwt.return r >>>= fun (digests, swap) ->
let digests = Archive_checksum.update_digests digests data in let digests = Archive_checksum.update_digests digests data in
match acc with active_add_bytes url (String.length data);
| `Init -> Swap.append swap data >|= function
begin match body_length response with | Ok () -> Ok (digests, swap)
| `Bad_response -> Lwt.return (Error `Bad_response) | Error swap_err -> Error (`Swap swap_err)
| `Fixed size ->
KV.allocate t.dev key (Optint.Int63.of_int64 size)
|> Lwt_result.map_error (fun e -> `Write_error e)
>>>= fun () ->
KV.set_partial t.dev key ~offset:Optint.Int63.zero data
|> Lwt_result.map_error (fun e -> `Write_error e) >>>= fun () ->
let len = String.length data in
let offset = Optint.Int63.of_int len in
active_length url len (Int64.to_string size ^ " bytes");
Lwt.return_ok (digests, `Fixed_body (size, offset))
| `Unknown ->
active_add_bytes url (String.length data);
let h = Swap.empty t.dev_swap in
Swap.append h data >|= function
| Ok () -> Ok (digests, `Unknown h)
| Error swap_err -> Error (`Swap swap_err)
end
| `Fixed_body (size, offset) ->
KV.set_partial t.dev key ~offset data
|> Lwt_result.map_error (fun e -> `Write_error e) >>>= fun () ->
let len = String.length data in
let offset = Optint.Int63.(add offset (of_int len)) in
active_add_bytes url len;
Lwt.return_ok (digests, `Fixed_body (size, offset))
| `Unknown h ->
active_add_bytes url (String.length data);
Swap.append h data >|= function
| Ok () -> Ok (digests, `Unknown h)
| Error swap_err -> Error (`Swap swap_err)
let check_csums_digests csums digests = let check_csums_digests csums digests =
let csums' = Archive_checksum.digests_to_hm digests in let csums' = Archive_checksum.digests_to_hm digests in
@ -389,6 +322,12 @@ module Make
common_bindings common_bindings
let set_from_handle dev dest h = let set_from_handle dev dest h =
(* TODO: we need a function in tar which
(a) takes a path
(b) takes a function that reads (from the swap) and writes (to the tar)
(c) once the function is finished, it writes the tar header
-> this would allow us to avoid the rename stuff below
*)
let size = Optint.Int63.of_int64 (Swap.size h) in let size = Optint.Int63.of_int64 (Swap.size h) in
KV.allocate dev dest size >>= fun r -> KV.allocate dev dest size >>= fun r ->
let rec loop offset = let rec loop offset =
@ -413,34 +352,20 @@ module Make
| Error e -> | Error e ->
Lwt.return (Error (`Write_error e)) Lwt.return (Error (`Write_error e))
let finalize_write t (hash, csum) ~url (body : [ `Unknown of Swap.handle | `Fixed_body of int64 * Optint.Int63.t | `Init ]) csums digests = let finalize_write t (hash, csum) ~url swap csums digests =
let sizes_match, body_size_in_header = if check_csums_digests csums digests then
match body with
| `Fixed_body (reported, actual) -> Optint.Int63.(equal (of_int64 reported) actual), true
| `Unknown _ -> true, false
| `Init -> assert false
in
let source = pending_key (hash, csum) in
if check_csums_digests csums digests && sizes_match then
let sha256 = Ohex.encode Digestif.SHA256.(to_raw_string (get digests.sha256)) let sha256 = Ohex.encode Digestif.SHA256.(to_raw_string (get digests.sha256))
and md5 = Ohex.encode Digestif.MD5.(to_raw_string (get digests.md5)) and md5 = Ohex.encode Digestif.MD5.(to_raw_string (get digests.md5))
and sha512 = Ohex.encode Digestif.SHA512.(to_raw_string (get digests.sha512)) in and sha512 = Ohex.encode Digestif.SHA512.(to_raw_string (get digests.sha512)) in
let dest = Mirage_kv.Key.v sha256 in let dest = Mirage_kv.Key.v sha256 in
begin match body with Logs.info (fun m -> m "downloaded %s, now writing" url);
| `Unknown h -> let temp = Mirage_kv.Key.(v "pending" // dest) in
Logs.info (fun m -> m "downloaded %s, now writing" url); Lwt_result.bind
Lwt_result.bind (Lwt.finalize (fun () -> set_from_handle t.dev temp swap)
(Lwt.finalize (fun () -> set_from_handle t.dev source h) (fun () -> Swap.free swap))
(fun () -> Swap.free h)) (fun () -> KV.rename t.dev ~source:temp ~dest
(fun () -> |> Lwt_result.map_error (fun e -> `Write_error e))
KV.rename t.dev ~source ~dest >|= function
|> Lwt_result.map_error (fun e -> `Write_error e))
| `Fixed_body (_reported_size, _actual_size) ->
Logs.info (fun m -> m "downloaded %s" url);
KV.rename t.dev ~source ~dest
|> Lwt_result.map_error (fun e -> `Write_error e)
| `Init -> assert false
end >|= function
| Ok () -> | Ok () ->
remove_active url; remove_active url;
t.md5s <- SM.add md5 sha256 t.md5s; t.md5s <- SM.add md5 sha256 t.md5s;
@ -451,46 +376,17 @@ module Make
| `Swap e -> Swap.pp_error ppf e | `Swap e -> Swap.pp_error ppf e
in in
Logs.err (fun m -> m "Write failure for %s: %a" url pp_error e); Logs.err (fun m -> m "Write failure for %s: %a" url pp_error e);
add_failed url (Ptime.v (Pclock.now_d_ps ())) match e with
(Fmt.str "Write failure for %s: %a" url pp_error e) | `Write_error e -> add_failed url (Ptime.v (Pclock.now_d_ps ())) (`Write_error e)
| `Swap e -> add_failed url (Ptime.v (Pclock.now_d_ps ())) (`Swap e)
else begin else begin
(if sizes_match then begin add_failed url (Ptime.v (Pclock.now_d_ps ()))
add_failed url (Ptime.v (Pclock.now_d_ps ())) (`Bad_checksum (hash, Archive_checksum.get digests hash, csum));
(Fmt.str "Bad checksum %s:%s: computed %s expected %s" url Logs.err (fun m -> m "Bad checksum %s:%s: computed %s expected %s" url
(hash_to_string hash) (hash_to_string hash)
(Ohex.encode (Archive_checksum.get digests hash)) (Ohex.encode (Archive_checksum.get digests hash))
(Ohex.encode csum)); (Ohex.encode csum));
Logs.err (fun m -> m "Bad checksum %s:%s: computed %s expected %s" url Lwt.return_unit
(hash_to_string hash)
(Ohex.encode (Archive_checksum.get digests hash))
(Ohex.encode csum))
end else match body with
| `Fixed_body (reported, actual) ->
add_failed url (Ptime.v (Pclock.now_d_ps ()))
(Fmt.str "Size mismatch %s: received %a bytes expected %Lu bytes"
url Optint.Int63.pp actual reported);
Logs.err (fun m -> m "Size mismatch %s: received %a bytes expected %Lu bytes"
url Optint.Int63.pp actual reported)
| `Unknown _ -> assert false
| `Init -> assert false);
if body_size_in_header then
(* if the checksums mismatch we want to delete the file. We are only
able to do so if it was the latest created file, so we expect and
error. Ideally, we want to match for `Append_only or other errors *)
KV.remove t.dev source >>= function
| Ok () -> Lwt.return_unit
| Error e ->
(* we failed to delete the file so we mark it for deletion *)
let dest = to_delete_key (hash, csum) in
Logs.warn (fun m -> m "Failed to remove %a: %a. Moving it to %a"
Mirage_kv.Key.pp source KV.pp_write_error e Mirage_kv.Key.pp dest);
KV.rename t.dev ~source ~dest >|= function
| Ok () -> ()
| Error e ->
Logs.warn (fun m -> m "Error renaming file %a -> %a: %a"
Mirage_kv.Key.pp source Mirage_kv.Key.pp dest KV.pp_write_error e)
else
Lwt.return_unit
end end
(* on disk, we use a flat file system where the filename is the sha256 of the data *) (* on disk, we use a flat file system where the filename is the sha256 of the data *)
@ -516,12 +412,6 @@ module Make
let md5s = SSet.of_list (List.map snd (SM.bindings t.md5s)) let md5s = SSet.of_list (List.map snd (SM.bindings t.md5s))
and sha512s = SSet.of_list (List.map snd (SM.bindings t.sha512s)) in and sha512s = SSet.of_list (List.map snd (SM.bindings t.sha512s)) in
let idx = ref 1 in let idx = ref 1 in
(* XXX: should we do something about pending downloads?? *)
let entries =
List.filter (fun (p, _) ->
not (Mirage_kv.Key.equal p pending || Mirage_kv.Key.equal p to_delete))
entries
in
Lwt_list.iter_s (fun (path, typ) -> Lwt_list.iter_s (fun (path, typ) ->
if !idx mod 10 = 0 then Gc.full_major () ; if !idx mod 10 = 0 then Gc.full_major () ;
match typ with match typ with
@ -530,19 +420,7 @@ module Make
Lwt.return_unit Lwt.return_unit
| `Value -> | `Value ->
let open Digestif in let open Digestif in
let sha256_final = let md5_final =
if verify_sha256 then
let f s =
let digest = SHA256.(to_raw_string (get s)) in
if not (String.equal (Mirage_kv.Key.basename path) (Ohex.encode digest)) then
Logs.err (fun m -> m "corrupt SHA256 data for %a, \
computed %s (should remove)"
Mirage_kv.Key.pp path (Ohex.encode digest))
in
Some f
else
None
and md5_final =
if not (SSet.mem (Mirage_kv.Key.basename path) md5s) then if not (SSet.mem (Mirage_kv.Key.basename path) md5s) then
let f s = let f s =
let digest = MD5.(to_raw_string (get s)) in let digest = MD5.(to_raw_string (get s)) in
@ -561,26 +439,53 @@ module Make
else else
None None
in in
match sha256_final, md5_final, sha512_final with let sha256_final =
| None, None, None -> Lwt.return_unit let need_to_compute = md5_final <> None || sha512_final <> None || verify_sha256 in
| _ -> if need_to_compute then
let f s =
let digest = SHA256.(to_raw_string (get s)) in
if not (String.equal (Mirage_kv.Key.basename path) (Ohex.encode digest)) then
begin
Logs.err (fun m -> m "corrupt SHA256 data for %a, \
computed %s (will rename)"
Mirage_kv.Key.pp path (Ohex.encode digest));
false
end else true
in
Some f
else
None
in
match sha256_final with
| None -> Lwt.return_unit
| Some f ->
read_chunked t `SHA256 path read_chunked t `SHA256 path
(fun (sha256, md5, sha512) data -> (fun (sha256, md5, sha512) data ->
Lwt.return Lwt.return
(Option.map (fun t -> SHA256.feed_string t data) sha256, (SHA256.feed_string sha256 data,
Option.map (fun t -> MD5.feed_string t data) md5, Option.map (fun t -> MD5.feed_string t data) md5,
Option.map (fun t -> SHA512.feed_string t data) sha512)) Option.map (fun t -> SHA512.feed_string t data) sha512))
(Option.map (fun _ -> SHA256.empty) sha256_final, (SHA256.empty,
Option.map (fun _ -> MD5.empty) md5_final, Option.map (fun _ -> MD5.empty) md5_final,
Option.map (fun _ -> SHA512.empty) sha512_final) >|= function Option.map (fun _ -> SHA512.empty) sha512_final) >>= function
| Error e -> | Error e ->
Logs.err (fun m -> m "error %a of %a while computing digests" Logs.err (fun m -> m "error %a of %a while computing digests"
KV.pp_error e Mirage_kv.Key.pp path) KV.pp_error e Mirage_kv.Key.pp path);
Lwt.return_unit
| Ok (sha256, md5, sha512) -> | Ok (sha256, md5, sha512) ->
Option.iter (fun f -> f (Option.get sha256)) sha256_final; if not (f sha256) then
Option.iter (fun f -> f (Option.get md5)) md5_final; (* bad sha256! *)
Option.iter (fun f -> f (Option.get sha512)) sha512_final; KV.rename t.dev ~source:path ~dest:(Mirage_kv.Key.(v "delete" // path)) >|= function
Logs.info (fun m -> m "added %a" Mirage_kv.Key.pp path)) | Ok () -> ()
| Error we ->
Logs.err (fun m -> m "error %a while renaming %a" KV.pp_write_error we
Mirage_kv.Key.pp path)
else begin
Option.iter (fun f -> f (Option.get md5)) md5_final;
Option.iter (fun f -> f (Option.get sha512)) sha512_final;
Logs.info (fun m -> m "added %a" Mirage_kv.Key.pp path);
Lwt.return_unit
end)
entries >>= fun () -> entries >>= fun () ->
update_caches t >|= fun () -> update_caches t >|= fun () ->
t t
@ -661,22 +566,23 @@ module Make
then Tar.High (High.inj (Lwt.return_ok None)) then Tar.High (High.inj (Lwt.return_ok None))
else begin closed := true; Tar.High (High.inj (Lwt.return_ok (Some data))) end else begin closed := true; Tar.High (High.inj (Lwt.return_ok (Some data))) end
let entries_of_git ~mtime store repo = let entries_of_git ~mtime store repo urls =
Git.find_contents store >>= fun paths -> let entries = Git.contents store in
let entries = Lwt_stream.of_list paths in
let to_entry path = let to_entry path =
Store.get store path >|= function Store.get store path >|= function
| Ok data -> | Ok data ->
let data = let data =
if Mirage_kv.Key.(equal path (v "repo")) if Mirage_kv.Key.(equal path (v "repo"))
then repo else data in then repo else data
in
let file_mode = 0o644 let file_mode = 0o644
and mod_time = Int64.of_int mtime and mod_time = Int64.of_int mtime
and user_id = 0 and user_id = 0
and group_id = 0 and group_id = 0
and size = String.length data in and size = String.length data in
let hdr = Tar.Header.make ~file_mode ~mod_time ~user_id ~group_id let hdr = Tar.Header.make ~file_mode ~mod_time ~user_id ~group_id
(Mirage_kv.Key.to_string path) (Int64.of_int size) in (Mirage_kv.Key.to_string path) (Int64.of_int size) in
urls := Git.find_urls !urls path data;
Some (Some Tar.Header.Ustar, hdr, once data) Some (Some Tar.Header.Ustar, hdr, once data)
| Error _ -> None in | Error _ -> None in
let entries = Lwt_stream.filter_map_s to_entry entries in let entries = Lwt_stream.filter_map_s to_entry entries in
@ -685,12 +591,13 @@ module Make
let of_git repo store = let of_git repo store =
let now = Ptime.v (Pclock.now_d_ps ()) in let now = Ptime.v (Pclock.now_d_ps ()) in
let mtime = Option.value ~default:0 Ptime.(Span.to_int_s (to_span now)) in let mtime = Option.value ~default:0 Ptime.(Span.to_int_s (to_span now)) in
entries_of_git ~mtime store repo >>= fun entries -> let urls = ref SM.empty in
entries_of_git ~mtime store repo urls >>= fun entries ->
let t = Tar.out ~level:Ustar entries in let t = Tar.out ~level:Ustar entries in
let t = Tar_gz.out_gzipped ~level:4 ~mtime:(Int32.of_int mtime) Gz.Unix t in let t = Tar_gz.out_gzipped ~level:4 ~mtime:(Int32.of_int mtime) Gz.Unix t in
let buf = Buffer.create 1024 in let buf = Buffer.create 1024 in
to_buffer buf t >|= function to_buffer buf t >|= function
| Ok () -> Buffer.contents buf | Ok () -> Buffer.contents buf, !urls
| Error (`Msg msg) -> failwith msg | Error (`Msg msg) -> failwith msg
end end
@ -743,8 +650,8 @@ stamp: %S
commit_id git_kv >>= fun commit_id -> commit_id git_kv >>= fun commit_id ->
modified git_kv >>= fun modified -> modified git_kv >>= fun modified ->
let repo = repo remote commit_id in let repo = repo remote commit_id in
Tarball.of_git repo git_kv >|= fun index -> Tarball.of_git repo git_kv >|= fun (index, urls) ->
{ commit_id ; modified ; repo ; index } { commit_id ; modified ; repo ; index }, urls
let update_lock = Lwt_mutex.create () let update_lock = Lwt_mutex.create ()
@ -757,18 +664,18 @@ stamp: %S
Lwt.return None Lwt.return None
| Ok [] -> | Ok [] ->
Logs.info (fun m -> m "git changes are empty"); Logs.info (fun m -> m "git changes are empty");
Lwt.return (Some []) Lwt.return (Some ([], SM.empty))
| Ok changes -> | Ok changes ->
commit_id git_kv >>= fun commit_id -> commit_id git_kv >>= fun commit_id ->
modified git_kv >>= fun modified -> modified git_kv >>= fun modified ->
Logs.info (fun m -> m "git: %s" commit_id); Logs.info (fun m -> m "git: %s" commit_id);
let repo = repo remote commit_id in let repo = repo remote commit_id in
Tarball.of_git repo git_kv >|= fun index -> Tarball.of_git repo git_kv >|= fun (index, urls) ->
t.commit_id <- commit_id ; t.commit_id <- commit_id ;
t.modified <- modified ; t.modified <- modified ;
t.repo <- repo ; t.repo <- repo ;
t.index <- index; t.index <- index;
Some changes) Some (changes, urls))
let status disk = let status disk =
(* report status: (* report status:
@ -781,27 +688,41 @@ stamp: %S
(SM.cardinal disk.Disk.md5s) (SM.cardinal disk.Disk.md5s)
(KV.free disk.Disk.dev) (KV.free disk.Disk.dev)
in in
let sort_by_ts a b = Ptime.compare a b in
let active_downloads = let active_downloads =
let header = "<h2>Active downloads</h2><ul>" in let header = "<h2>Active downloads</h2><ul>" in
let content = let content =
SM.fold (fun url (ts, bytes_written, length_or_unknown) acc -> SM.bindings !active_downloads |>
("<li>" ^ Ptime.to_rfc3339 ?tz_offset_s:None ts ^ ": " ^ url ^ " " ^ string_of_int bytes_written ^ " bytes written to disk, " ^ length_or_unknown ^ "</li>") List.sort (fun (_, (a, _)) (_, (b, _)) -> sort_by_ts a b) |>
^ acc) List.map (fun (url, (ts, bytes_written)) ->
!active_downloads "" "<li>" ^ Ptime.to_rfc3339 ?tz_offset_s:None ts ^ ": " ^ url ^ " " ^ string_of_int bytes_written ^ " bytes written to swap</li>")
in in
header ^ content ^ "</ul>" header ^ String.concat "" content ^ "</ul>"
and failed_downloads = and failed_downloads =
let header = "<h2>Failed downloads</h2><ul>" in let header = "<h2>Failed downloads</h2><ul>" in
let content = let content =
SM.fold (fun url (ts, reason) acc -> SM.bindings !failed_downloads |>
("<li>" ^ Ptime.to_rfc3339 ?tz_offset_s:None ts ^ ": " ^ url ^ " " ^ reason ^ "</li>") List.sort (fun (_, (a, reasona)) (_, (b, reasonb)) ->
^ acc) match compare_failed reasona reasonb with
!failed_downloads "" | 0 -> sort_by_ts a b
| n -> n) |>
List.map (fun (url, (ts, reason)) ->
Fmt.str "<li>%s: %s error %a"
(Ptime.to_rfc3339 ?tz_offset_s:None ts) url pp_failed reason)
in in
header ^ content ^ "</ul>" header ^ String.concat "" content ^ "</ul>"
and parse_errors =
let header = "<h2>Parse errors</h2><ul>" in
let content =
SM.bindings !parse_errors |>
List.sort (fun (a, _) (b, _) -> String.compare a b) |>
List.map (fun (filename, reason) ->
"<li>" ^ filename ^ ": " ^ reason ^ "</li>")
in
header ^ String.concat "" content ^ "</ul>"
in in
"<html><head><title>Opam-mirror status page</title></head><body><h1>Opam mirror status</h1><div>" "<html><head><title>Opam-mirror status page</title></head><body><h1>Opam mirror status</h1><div>"
^ String.concat "</div><div>" [ archive_stats ; active_downloads ; failed_downloads ] ^ String.concat "</div><div>" [ archive_stats ; active_downloads ; failed_downloads ; parse_errors ]
^ "</div></body></html>" ^ "</div></body></html>"
let not_modified request (modified, etag) = let not_modified request (modified, etag) =
@ -951,17 +872,12 @@ stamp: %S
end end
let bad_archives = SSet.of_list Bad.archives let download_archives parallel_downloads disk http_client urls =
let download_archives parallel_downloads disk http_client store =
(* FIXME: handle resuming partial downloads *) (* FIXME: handle resuming partial downloads *)
Git.find_urls store >>= fun urls ->
let urls = SM.filter (fun k _ -> not (SSet.mem k bad_archives)) urls in
let pool = Lwt_pool.create parallel_downloads (Fun.const Lwt.return_unit) in let pool = Lwt_pool.create parallel_downloads (Fun.const Lwt.return_unit) in
let idx = ref 0 in let idx = ref 0 in
Lwt_list.iter_p (fun (url, csums) -> Lwt_list.iter_p (fun (url, csums) ->
Lwt_pool.use pool @@ fun () -> Lwt_pool.use pool @@ fun () ->
(* FIXME: check pending and to-delete *)
HM.fold (fun h v r -> HM.fold (fun h v r ->
r >>= function r >>= function
| true -> Disk.exists disk h (hex_to_key v) | true -> Disk.exists disk h (hex_to_key v)
@ -974,7 +890,7 @@ stamp: %S
incr idx; incr idx;
if !idx mod 10 = 0 then Gc.full_major () ; if !idx mod 10 = 0 then Gc.full_major () ;
Logs.info (fun m -> m "downloading %s" url); Logs.info (fun m -> m "downloading %s" url);
let quux, body_init = Archive_checksum.init_write csums in let quux, body_init = Disk.init_write disk csums in
add_to_active url (Ptime.v (Pclock.now_d_ps ())); add_to_active url (Ptime.v (Pclock.now_d_ps ()));
Http_mirage_client.request http_client url (Disk.write_partial disk quux url) body_init >>= function Http_mirage_client.request http_client url (Disk.write_partial disk quux url) body_init >>= function
| Ok (resp, r) -> | Ok (resp, r) ->
@ -983,30 +899,25 @@ stamp: %S
Logs.warn (fun m -> m "%s: %a (reason %s)" Logs.warn (fun m -> m "%s: %a (reason %s)"
url H2.Status.pp_hum resp.status resp.reason); url H2.Status.pp_hum resp.status resp.reason);
add_failed url (Ptime.v (Pclock.now_d_ps ())) add_failed url (Ptime.v (Pclock.now_d_ps ()))
(Fmt.str "%a %s" H2.Status.pp_hum resp.status resp.reason); (`Bad_response (resp.status, resp.reason));
Lwt.return_unit Lwt.return_unit
| Error `Write_error e -> | Error `Write_error e ->
Logs.err (fun m -> m "%s: write error %a %a" Logs.err (fun m -> m "%s: write error %a"
url url
Mirage_kv.Key.pp (Disk.pending_key quux)
KV.pp_write_error e); KV.pp_write_error e);
add_failed url (Ptime.v (Pclock.now_d_ps ())) add_failed url (Ptime.v (Pclock.now_d_ps ())) (`Write_error e);
(Fmt.str "write error: %a" KV.pp_write_error e);
Lwt.return_unit Lwt.return_unit
| Error `Swap e -> | Error `Swap e ->
Logs.err (fun m -> m "%s: swap error %a %a" Logs.err (fun m -> m "%s: swap error %a"
url url
Mirage_kv.Key.pp (Disk.pending_key quux)
Swap.pp_error e); Swap.pp_error e);
add_failed url (Ptime.v (Pclock.now_d_ps ())) add_failed url (Ptime.v (Pclock.now_d_ps ())) (`Swap e);
(Fmt.str "swap error: %a" Swap.pp_error e);
Lwt.return_unit Lwt.return_unit
| Ok (digests, body) -> | Ok (digests, body) ->
Disk.finalize_write disk quux ~url body csums digests Disk.finalize_write disk quux ~url body csums digests
end end
| Error me -> | Error me ->
add_failed url (Ptime.v (Pclock.now_d_ps ())) add_failed url (Ptime.v (Pclock.now_d_ps ())) (`Mimic me);
(Fmt.str "mimic error: %a" Mimic.pp_error me);
Lwt.return_unit) Lwt.return_unit)
(SM.bindings urls) >>= fun () -> (SM.bindings urls) >>= fun () ->
Disk.update_caches disk >|= fun () -> Disk.update_caches disk >|= fun () ->
@ -1062,14 +973,14 @@ stamp: %S
Logs.info (fun m -> m "Done initializing git state!"); Logs.info (fun m -> m "Done initializing git state!");
Serve.commit_id git_kv >>= fun commit_id -> Serve.commit_id git_kv >>= fun commit_id ->
Logs.info (fun m -> m "git: %s" commit_id); Logs.info (fun m -> m "git: %s" commit_id);
Serve.create remote git_kv >>= fun serve -> Serve.create remote git_kv >>= fun (serve, urls) ->
Paf.init ~port:(K.port ()) (Stack.tcp stack) >>= fun t -> Paf.init ~port:(K.port ()) (Stack.tcp stack) >>= fun t ->
let update () = let update () =
Serve.update_git ~remote serve git_kv >>= function Serve.update_git ~remote serve git_kv >>= function
| None | Some [] -> Lwt.return_unit | None | Some ([], _) -> Lwt.return_unit
| Some _changes -> | Some (_changes, urls) ->
dump_git git_dump git_kv >>= fun () -> dump_git git_dump git_kv >>= fun () ->
download_archives (K.parallel_downloads ()) disk http_ctx git_kv download_archives (K.parallel_downloads ()) disk http_ctx urls
in in
let service = let service =
Paf.http_service Paf.http_service
@ -1085,7 +996,7 @@ stamp: %S
go () go ()
in in
go ()); go ());
download_archives (K.parallel_downloads ()) disk http_ctx git_kv >>= fun () -> download_archives (K.parallel_downloads ()) disk http_ctx urls >>= fun () ->
(th >|= fun _v -> ()) (th >|= fun _v -> ())
let start block _time _pclock stack git_ctx http_ctx = let start block _time _pclock stack git_ctx http_ctx =