record and preserve opam file parsing issues

This commit is contained in:
Hannes Mehnert 2024-11-04 18:27:27 +01:00
parent a47193f147
commit 9c50538877
2 changed files with 97 additions and 67 deletions

View file

@ -13,17 +13,22 @@ let hex_of_string s =
let decode_digest filename str = let decode_digest filename str =
let hex h s = let hex h s =
match hex_of_string s with match hex_of_string s with
| Ok d -> Some (h, d) | Ok d -> Ok (h, d)
| Error `Msg msg -> | Error `Msg msg as e ->
Log.warn (fun m -> m "%s invalid hex (%s) %s" filename msg s); None Log.warn (fun m -> m "%s invalid hex (%s) %s" filename msg s);
e
in in
match String.split_on_char '=' str with match String.split_on_char '=' str with
| [ data ] -> hex `MD5 data | [ data ] -> hex `MD5 data
| [ "md5" ; data ] -> hex `MD5 data | [ "md5" ; data ] -> hex `MD5 data
| [ "sha256" ; data ] -> hex `SHA256 data | [ "sha256" ; data ] -> hex `SHA256 data
| [ "sha512" ; data ] -> hex `SHA512 data | [ "sha512" ; data ] -> hex `SHA512 data
| [ hash ; _ ] -> Log.warn (fun m -> m "%s unknown hash %s" filename hash); None | [ hash ; _ ] ->
| _ -> Log.warn (fun m -> m "%s unexpected hash format %S" filename str); None Log.warn (fun m -> m "%s unknown hash %s" filename hash);
Error (`Msg ("unknown hash " ^ hash))
| _ ->
Log.warn (fun m -> m "%s unexpected hash format %S" filename str);
Error (`Msg ("unexpected hash format " ^ str))
let extract_url_checksum filename items = let extract_url_checksum filename items =
let open OpamParserTypes.FullPos in let open OpamParserTypes.FullPos in
@ -42,64 +47,73 @@ let extract_url_checksum filename items =
in in
let url = let url =
match url, archive with match url, archive with
| Some { pelem = Variable (_, { pelem = String url ; _ }) ; _ }, None -> Some url | Some { pelem = Variable (_, { pelem = String url ; _ }) ; _ }, None -> Ok url
| None, Some { pelem = Variable (_, { pelem = String url ; _ }); _ } -> Some url | None, Some { pelem = Variable (_, { pelem = String url ; _ }); _ } -> Ok url
| _ -> | _ ->
Log.warn (fun m -> m "%s neither src nor archive present" filename); None Log.warn (fun m -> m "%s neither src nor archive present" filename);
Error (`Msg "neither 'src' nor 'archive' present")
in in
let csum = let csum, csum_errs =
match checksum with match checksum with
| Some { pelem = Variable (_, { pelem = List { pelem = csums ; _ } ; _ }); _ } -> | Some { pelem = Variable (_, { pelem = List { pelem = csums ; _ } ; _ }); _ } ->
let csums = let csums, errs =
List.fold_left (fun acc -> List.fold_left (fun (csums, errs) ->
function function
| { pelem = String csum ; _ } -> | { pelem = String csum ; _ } ->
begin match decode_digest filename csum with begin match decode_digest filename csum with
| None -> acc | Error e -> csums, e :: errs
| Some (h, v) -> | Ok (h, v) ->
HM.update h (function HM.update h (function
| None -> Some v | None -> Some v
| Some v' when String.equal v v' -> None | Some v' when String.equal v v' -> None
| Some v' -> | Some v' ->
Log.warn (fun m -> m "for %s, hash %s, multiple keys are present: %s %s" Log.warn (fun m -> m "for %s, hash %s, multiple keys are present: %s %s"
(Option.value ~default:"NONE" url) (hash_to_string h) (Ohex.encode v) (Ohex.encode v')); (Result.value ~default:"NONE" url) (hash_to_string h) (Ohex.encode v) (Ohex.encode v'));
None) None)
acc csums, errs
end end
| _ -> acc) HM.empty csums | v ->
csums, `Msg (Fmt.str "bad checksum data: %s" (OpamPrinter.FullPos.value v)) :: errs)
(HM.empty, []) csums
in in
Some csums if HM.is_empty csums then
match errs with
| hd :: tl -> Error hd, tl
| [] -> Error (`Msg "empty checksums"), []
else
Ok csums, errs
| Some { pelem = Variable (_, { pelem = String csum ; _ }) ; _ } -> | Some { pelem = Variable (_, { pelem = String csum ; _ }) ; _ } ->
begin match decode_digest filename csum with begin match decode_digest filename csum with
| None -> None | Error _ as e -> e, []
| Some (h, v) -> Some (HM.singleton h v) | Ok (h, v) -> Ok (HM.singleton h v), []
end end
| _ -> | _ ->
Log.warn (fun m -> m "couldn't decode checksum in %s" filename); Log.warn (fun m -> m "couldn't decode checksum in %s" filename);
None Error (`Msg "couldn't find or decode 'checksum'"), []
in in
match url, csum with (match url, csum with
| Some url, Some cs -> Some (url, cs) | Ok url, Ok csum -> Ok (url, csum)
| _ -> None | Error _ as e, _
| _, (Error _ as e) -> e), csum_errs
let extract_checksums_and_urls filename opam = let extract_checksums_and_urls filename opam =
let open OpamParserTypes.FullPos in let open OpamParserTypes.FullPos in
List.fold_left (fun acc -> List.fold_left (fun (csum_urls, errs) ->
function function
| { pelem = Section ({ section_kind = { pelem = "url" ; _ } ; section_items = { pelem = items ; _ } ; _ }) ; _} -> | { pelem = Section ({ section_kind = { pelem = "url" ; _ } ; section_items = { pelem = items ; _ } ; _ }) ; _} ->
begin match extract_url_checksum filename items with begin match extract_url_checksum filename items with
| None -> acc | Error `Msg msg, errs' -> csum_urls, `Msg ("url: " ^ msg) :: errs' @ errs
| Some url -> url :: acc | Ok url, errs' -> url :: csum_urls, errs' @ errs
end end
| { pelem = Section ({ section_kind = { pelem = "extra-source" ; _ } ; section_name = Some { pelem ; _ } ; section_items = { pelem = items ; _ }; _ }) ; _} -> | { pelem = Section ({ section_kind = { pelem = "extra-source" ; _ } ; section_name = Some { pelem ; _ } ; section_items = { pelem = items ; _ }; _ }) ; _} ->
begin begin
Log.debug (fun m -> m "extracting for extra-source %s in %s" filename pelem); Log.debug (fun m -> m "extracting for extra-source %s in %s" filename pelem);
match extract_url_checksum filename items with match extract_url_checksum filename items with
| None -> acc | Error `Msg msg, errs' -> csum_urls, `Msg ("extra-source " ^ pelem ^ " " ^ msg) :: errs' @ errs
| Some url -> url :: acc | Ok url, errs' -> url :: csum_urls, errs' @ errs
end end
| _ -> acc) | _ -> csum_urls, errs)
[] opam.file_contents ([], []) opam.file_contents
let extract_urls filename str = let extract_urls filename str =
(* in an opam file, there may be: (* in an opam file, there may be:
@ -121,6 +135,6 @@ let extract_urls filename str =
in in
if unavailable then if unavailable then
(Log.debug (fun m -> m "%s is marked unavailable, skipping" filename); (Log.debug (fun m -> m "%s is marked unavailable, skipping" filename);
[]) [], [])
else else
extract_checksums_and_urls filename opamfile extract_checksums_and_urls filename opamfile

View file

@ -101,6 +101,11 @@ module Make
hash_to_string h ^ "=" ^ Ohex.encode v ^ "\n" ^ acc) hash_to_string h ^ "=" ^ Ohex.encode v ^ "\n" ^ acc)
hm "" hm ""
let parse_errors = ref SM.empty
let add_parse_error filename error =
parse_errors := SM.add filename error !parse_errors
module Git = struct module Git = struct
let contents store = let contents store =
let explore = ref [ Mirage_kv.Key.empty ] in let explore = ref [ Mirage_kv.Key.empty ] in
@ -135,12 +140,14 @@ module Make
let find_urls acc path data = let find_urls acc path data =
if Mirage_kv.Key.basename path = "opam" then if Mirage_kv.Key.basename path = "opam" then
(* TODO: parser errors are logged (should be reported to status page) *) let path = Mirage_kv.Key.to_string path in
(try let url_csums, errs = Opam_file.extract_urls path data in
let url_csums = Opam_file.extract_urls (Mirage_kv.Key.to_string path) data in List.iter (fun (`Msg msg) -> add_parse_error path msg) errs;
List.fold_left (fun acc (url, csums) -> List.fold_left (fun acc (url, csums) ->
if HM.cardinal csums = 0 then if HM.cardinal csums = 0 then
(Logs.warn (fun m -> m "no checksums for %s, ignoring" url); acc) (Logs.warn (fun m -> m "no checksums for %s, ignoring" url);
add_parse_error path ("no checksums for " ^ url);
acc)
else else
SM.update url (function SM.update url (function
| None -> Some csums | None -> Some csums
@ -154,12 +161,11 @@ module Make
else begin else begin
Logs.warn (fun m -> m "mismatching hashes for %s: %s vs %s" Logs.warn (fun m -> m "mismatching hashes for %s: %s vs %s"
url (hm_to_s csums') (hm_to_s csums)); url (hm_to_s csums') (hm_to_s csums));
add_parse_error path (Fmt.str
"mismatching hashes for %s: %s vs %s"
url (hm_to_s csums') (hm_to_s csums));
None None
end) acc) acc url_csums end) acc) acc url_csums
with exn ->
Logs.warn (fun m -> m "some error in %a, ignoring %s"
Mirage_kv.Key.pp path (Printexc.to_string exn));
acc)
else else
acc acc
@ -654,27 +660,37 @@ stamp: %S
(SM.cardinal disk.Disk.md5s) (SM.cardinal disk.Disk.md5s)
(KV.free disk.Disk.dev) (KV.free disk.Disk.dev)
in in
let sort_by_ts a b = Ptime.compare a b in
let active_downloads = let active_downloads =
let header = "<h2>Active downloads</h2><ul>" in let header = "<h2>Active downloads</h2><ul>" in
let content = let content =
SM.fold (fun url (ts, bytes_written) acc -> SM.bindings !active_downloads |>
("<li>" ^ Ptime.to_rfc3339 ?tz_offset_s:None ts ^ ": " ^ url ^ " " ^ string_of_int bytes_written ^ " bytes written to disk</li>") List.sort (fun (_, (a, _)) (_, (b, _)) -> sort_by_ts a b) |>
^ acc) List.map (fun (url, (ts, bytes_written)) ->
!active_downloads "" "<li>" ^ Ptime.to_rfc3339 ?tz_offset_s:None ts ^ ": " ^ url ^ " " ^ string_of_int bytes_written ^ " bytes written to swap</li>")
in in
header ^ content ^ "</ul>" header ^ String.concat "" content ^ "</ul>"
and failed_downloads = and failed_downloads =
let header = "<h2>Failed downloads</h2><ul>" in let header = "<h2>Failed downloads</h2><ul>" in
let content = let content =
SM.fold (fun url (ts, reason) acc -> SM.bindings !failed_downloads |>
("<li>" ^ Ptime.to_rfc3339 ?tz_offset_s:None ts ^ ": " ^ url ^ " " ^ reason ^ "</li>") List.sort (fun (_, (a, _)) (_, (b, _)) -> sort_by_ts a b) |>
^ acc) List.map (fun (url, (ts, reason)) ->
!failed_downloads "" "<li>" ^ Ptime.to_rfc3339 ?tz_offset_s:None ts ^ ": " ^ url ^ " " ^ reason ^ "</li>")
in in
header ^ content ^ "</ul>" header ^ String.concat "" content ^ "</ul>"
and parse_errors =
let header = "<h2>Parse errors</h2><ul>" in
let content =
SM.bindings !parse_errors |>
List.sort (fun (a, _) (b, _) -> String.compare a b) |>
List.map (fun (filename, reason) ->
"<li>" ^ filename ^ ": " ^ reason ^ "</li>")
in
header ^ String.concat "" content ^ "</ul>"
in in
"<html><head><title>Opam-mirror status page</title></head><body><h1>Opam mirror status</h1><div>" "<html><head><title>Opam-mirror status page</title></head><body><h1>Opam mirror status</h1><div>"
^ String.concat "</div><div>" [ archive_stats ; active_downloads ; failed_downloads ] ^ String.concat "</div><div>" [ archive_stats ; active_downloads ; failed_downloads ; parse_errors ]
^ "</div></body></html>" ^ "</div></body></html>"
let not_modified request (modified, etag) = let not_modified request (modified, etag) =