record and preserve opam file parsing issues

This commit is contained in:
Hannes Mehnert 2024-11-04 18:27:27 +01:00
parent a47193f147
commit 9c50538877
2 changed files with 97 additions and 67 deletions

View file

@ -13,17 +13,22 @@ let hex_of_string s =
let decode_digest filename str =
let hex h s =
match hex_of_string s with
| Ok d -> Some (h, d)
| Error `Msg msg ->
Log.warn (fun m -> m "%s invalid hex (%s) %s" filename msg s); None
| Ok d -> Ok (h, d)
| Error `Msg msg as e ->
Log.warn (fun m -> m "%s invalid hex (%s) %s" filename msg s);
e
in
match String.split_on_char '=' str with
| [ data ] -> hex `MD5 data
| [ "md5" ; data ] -> hex `MD5 data
| [ "sha256" ; data ] -> hex `SHA256 data
| [ "sha512" ; data ] -> hex `SHA512 data
| [ hash ; _ ] -> Log.warn (fun m -> m "%s unknown hash %s" filename hash); None
| _ -> Log.warn (fun m -> m "%s unexpected hash format %S" filename str); None
| [ hash ; _ ] ->
Log.warn (fun m -> m "%s unknown hash %s" filename hash);
Error (`Msg ("unknown hash " ^ hash))
| _ ->
Log.warn (fun m -> m "%s unexpected hash format %S" filename str);
Error (`Msg ("unexpected hash format " ^ str))
let extract_url_checksum filename items =
let open OpamParserTypes.FullPos in
@ -42,64 +47,73 @@ let extract_url_checksum filename items =
in
let url =
match url, archive with
| Some { pelem = Variable (_, { pelem = String url ; _ }) ; _ }, None -> Some url
| None, Some { pelem = Variable (_, { pelem = String url ; _ }); _ } -> Some url
| Some { pelem = Variable (_, { pelem = String url ; _ }) ; _ }, None -> Ok url
| None, Some { pelem = Variable (_, { pelem = String url ; _ }); _ } -> Ok url
| _ ->
Log.warn (fun m -> m "%s neither src nor archive present" filename); None
Log.warn (fun m -> m "%s neither src nor archive present" filename);
Error (`Msg "neither 'src' nor 'archive' present")
in
let csum =
let csum, csum_errs =
match checksum with
| Some { pelem = Variable (_, { pelem = List { pelem = csums ; _ } ; _ }); _ } ->
let csums =
List.fold_left (fun acc ->
let csums, errs =
List.fold_left (fun (csums, errs) ->
function
| { pelem = String csum ; _ } ->
begin match decode_digest filename csum with
| None -> acc
| Some (h, v) ->
| Error e -> csums, e :: errs
| Ok (h, v) ->
HM.update h (function
| None -> Some v
| Some v' when String.equal v v' -> None
| Some v' ->
Log.warn (fun m -> m "for %s, hash %s, multiple keys are present: %s %s"
(Option.value ~default:"NONE" url) (hash_to_string h) (Ohex.encode v) (Ohex.encode v'));
(Result.value ~default:"NONE" url) (hash_to_string h) (Ohex.encode v) (Ohex.encode v'));
None)
acc
csums, errs
end
| _ -> acc) HM.empty csums
| v ->
csums, `Msg (Fmt.str "bad checksum data: %s" (OpamPrinter.FullPos.value v)) :: errs)
(HM.empty, []) csums
in
Some csums
if HM.is_empty csums then
match errs with
| hd :: tl -> Error hd, tl
| [] -> Error (`Msg "empty checksums"), []
else
Ok csums, errs
| Some { pelem = Variable (_, { pelem = String csum ; _ }) ; _ } ->
begin match decode_digest filename csum with
| None -> None
| Some (h, v) -> Some (HM.singleton h v)
| Error _ as e -> e, []
| Ok (h, v) -> Ok (HM.singleton h v), []
end
| _ ->
Log.warn (fun m -> m "couldn't decode checksum in %s" filename);
None
Error (`Msg "couldn't find or decode 'checksum'"), []
in
match url, csum with
| Some url, Some cs -> Some (url, cs)
| _ -> None
(match url, csum with
| Ok url, Ok csum -> Ok (url, csum)
| Error _ as e, _
| _, (Error _ as e) -> e), csum_errs
let extract_checksums_and_urls filename opam =
let open OpamParserTypes.FullPos in
List.fold_left (fun acc ->
List.fold_left (fun (csum_urls, errs) ->
function
| { pelem = Section ({ section_kind = { pelem = "url" ; _ } ; section_items = { pelem = items ; _ } ; _ }) ; _} ->
begin match extract_url_checksum filename items with
| None -> acc
| Some url -> url :: acc
| Error `Msg msg, errs' -> csum_urls, `Msg ("url: " ^ msg) :: errs' @ errs
| Ok url, errs' -> url :: csum_urls, errs' @ errs
end
| { pelem = Section ({ section_kind = { pelem = "extra-source" ; _ } ; section_name = Some { pelem ; _ } ; section_items = { pelem = items ; _ }; _ }) ; _} ->
begin
Log.debug (fun m -> m "extracting for extra-source %s in %s" filename pelem);
match extract_url_checksum filename items with
| None -> acc
| Some url -> url :: acc
| Error `Msg msg, errs' -> csum_urls, `Msg ("extra-source " ^ pelem ^ " " ^ msg) :: errs' @ errs
| Ok url, errs' -> url :: csum_urls, errs' @ errs
end
| _ -> acc)
[] opam.file_contents
| _ -> csum_urls, errs)
([], []) opam.file_contents
let extract_urls filename str =
(* in an opam file, there may be:
@ -121,6 +135,6 @@ let extract_urls filename str =
in
if unavailable then
(Log.debug (fun m -> m "%s is marked unavailable, skipping" filename);
[])
[], [])
else
extract_checksums_and_urls filename opamfile

View file

@ -101,6 +101,11 @@ module Make
hash_to_string h ^ "=" ^ Ohex.encode v ^ "\n" ^ acc)
hm ""
let parse_errors = ref SM.empty
let add_parse_error filename error =
parse_errors := SM.add filename error !parse_errors
module Git = struct
let contents store =
let explore = ref [ Mirage_kv.Key.empty ] in
@ -135,31 +140,32 @@ module Make
let find_urls acc path data =
if Mirage_kv.Key.basename path = "opam" then
(* TODO: parser errors are logged (should be reported to status page) *)
(try
let url_csums = Opam_file.extract_urls (Mirage_kv.Key.to_string path) data in
List.fold_left (fun acc (url, csums) ->
if HM.cardinal csums = 0 then
(Logs.warn (fun m -> m "no checksums for %s, ignoring" url); acc)
else
SM.update url (function
| None -> Some csums
| Some csums' ->
if HM.for_all (fun h v ->
match HM.find_opt h csums with
| None -> true | Some v' -> String.equal v v')
csums'
then
Some (HM.union (fun _h v _v' -> Some v) csums csums')
else begin
Logs.warn (fun m -> m "mismatching hashes for %s: %s vs %s"
url (hm_to_s csums') (hm_to_s csums));
None
end) acc) acc url_csums
with exn ->
Logs.warn (fun m -> m "some error in %a, ignoring %s"
Mirage_kv.Key.pp path (Printexc.to_string exn));
acc)
let path = Mirage_kv.Key.to_string path in
let url_csums, errs = Opam_file.extract_urls path data in
List.iter (fun (`Msg msg) -> add_parse_error path msg) errs;
List.fold_left (fun acc (url, csums) ->
if HM.cardinal csums = 0 then
(Logs.warn (fun m -> m "no checksums for %s, ignoring" url);
add_parse_error path ("no checksums for " ^ url);
acc)
else
SM.update url (function
| None -> Some csums
| Some csums' ->
if HM.for_all (fun h v ->
match HM.find_opt h csums with
| None -> true | Some v' -> String.equal v v')
csums'
then
Some (HM.union (fun _h v _v' -> Some v) csums csums')
else begin
Logs.warn (fun m -> m "mismatching hashes for %s: %s vs %s"
url (hm_to_s csums') (hm_to_s csums));
add_parse_error path (Fmt.str
"mismatching hashes for %s: %s vs %s"
url (hm_to_s csums') (hm_to_s csums));
None
end) acc) acc url_csums
else
acc
@ -654,27 +660,37 @@ stamp: %S
(SM.cardinal disk.Disk.md5s)
(KV.free disk.Disk.dev)
in
let sort_by_ts a b = Ptime.compare a b in
let active_downloads =
let header = "<h2>Active downloads</h2><ul>" in
let content =
SM.fold (fun url (ts, bytes_written) acc ->
("<li>" ^ Ptime.to_rfc3339 ?tz_offset_s:None ts ^ ": " ^ url ^ " " ^ string_of_int bytes_written ^ " bytes written to disk</li>")
^ acc)
!active_downloads ""
SM.bindings !active_downloads |>
List.sort (fun (_, (a, _)) (_, (b, _)) -> sort_by_ts a b) |>
List.map (fun (url, (ts, bytes_written)) ->
"<li>" ^ Ptime.to_rfc3339 ?tz_offset_s:None ts ^ ": " ^ url ^ " " ^ string_of_int bytes_written ^ " bytes written to swap</li>")
in
header ^ content ^ "</ul>"
header ^ String.concat "" content ^ "</ul>"
and failed_downloads =
let header = "<h2>Failed downloads</h2><ul>" in
let content =
SM.fold (fun url (ts, reason) acc ->
("<li>" ^ Ptime.to_rfc3339 ?tz_offset_s:None ts ^ ": " ^ url ^ " " ^ reason ^ "</li>")
^ acc)
!failed_downloads ""
SM.bindings !failed_downloads |>
List.sort (fun (_, (a, _)) (_, (b, _)) -> sort_by_ts a b) |>
List.map (fun (url, (ts, reason)) ->
"<li>" ^ Ptime.to_rfc3339 ?tz_offset_s:None ts ^ ": " ^ url ^ " " ^ reason ^ "</li>")
in
header ^ content ^ "</ul>"
header ^ String.concat "" content ^ "</ul>"
and parse_errors =
let header = "<h2>Parse errors</h2><ul>" in
let content =
SM.bindings !parse_errors |>
List.sort (fun (a, _) (b, _) -> String.compare a b) |>
List.map (fun (filename, reason) ->
"<li>" ^ filename ^ ": " ^ reason ^ "</li>")
in
header ^ String.concat "" content ^ "</ul>"
in
"<html><head><title>Opam-mirror status page</title></head><body><h1>Opam mirror status</h1><div>"
^ String.concat "</div><div>" [ archive_stats ; active_downloads ; failed_downloads ]
^ String.concat "</div><div>" [ archive_stats ; active_downloads ; failed_downloads ; parse_errors ]
^ "</div></body></html>"
let not_modified request (modified, etag) =