From 9a2576c423c2ea372b8d721028b163b48427aaf2 Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Thu, 24 Oct 2024 14:45:31 +0200 Subject: [PATCH 1/2] extract the opam parsing function to a separate module (opam_file.ml / Opam_file) prepares for handling extra-source as well --- mirage/opam_file.ml | 114 ++++++++++++++++++++++++++++++++++++++++++++ mirage/unikernel.ml | 110 +----------------------------------------- 2 files changed, 116 insertions(+), 108 deletions(-) create mode 100644 mirage/opam_file.ml diff --git a/mirage/opam_file.ml b/mirage/opam_file.ml new file mode 100644 index 0000000..ce1665e --- /dev/null +++ b/mirage/opam_file.ml @@ -0,0 +1,114 @@ +let src = Logs.Src.create "opam-file.opam-mirror" ~doc:"Opam file decoding in opam-mirror" +module Log = (val Logs.src_log src : Logs.LOG) + +module HM = Archive_checksum.HM + +let hash_to_string = Archive_checksum.Hash.to_string + +let hex_of_string s = + match Ohex.decode s with + | d -> Ok d + | exception Invalid_argument err -> Error (`Msg err) + +let decode_digest filename str = + let hex h s = + match hex_of_string s with + | Ok d -> Some (h, d) + | Error `Msg msg -> + Log.warn (fun m -> m "%s invalid hex (%s) %s" filename msg s); None + in + match String.split_on_char '=' str with + | [ data ] -> hex `MD5 data + | [ "md5" ; data ] -> hex `MD5 data + | [ "sha256" ; data ] -> hex `SHA256 data + | [ "sha512" ; data ] -> hex `SHA512 data + | [ hash ; _ ] -> Log.warn (fun m -> m "%s unknown hash %s" filename hash); None + | _ -> Log.warn (fun m -> m "%s unexpected hash format %S" filename str); None + +let extract_url_checksum filename items = + let open OpamParserTypes.FullPos in + let url = + List.find_opt + (function { pelem = Variable ({ pelem = "src" ; _ }, _); _ } -> true | _ -> false) + items + and archive = + List.find_opt + (function { pelem = Variable ({ pelem = "archive" ; _ }, _); _ } -> true | _ -> false) + items + and checksum = + List.find_opt + (function { pelem = Variable ({ pelem = "checksum" ; _ }, _); _ } -> true | _ -> false) + items + in + let url = + match url, archive with + | Some { pelem = Variable (_, { pelem = String url ; _ }) ; _ }, None -> Some url + | None, Some { pelem = Variable (_, { pelem = String url ; _ }); _ } -> Some url + | _ -> + Log.warn (fun m -> m "%s neither src nor archive present" filename); None + in + let csum = + match checksum with + | Some { pelem = Variable (_, { pelem = List { pelem = csums ; _ } ; _ }); _ } -> + let csums = + List.fold_left (fun acc -> + function + | { pelem = String csum ; _ } -> + begin match decode_digest filename csum with + | None -> acc + | Some (h, v) -> + HM.update h (function + | None -> Some v + | Some v' when String.equal v v' -> None + | Some v' -> + Log.warn (fun m -> m "for %s, hash %s, multiple keys are present: %s %s" + (Option.value ~default:"NONE" url) (hash_to_string h) (Ohex.encode v) (Ohex.encode v')); + None) + acc + end + | _ -> acc) HM.empty csums + in + Some csums + | Some { pelem = Variable (_, { pelem = String csum ; _ }) ; _ } -> + begin match decode_digest filename csum with + | None -> None + | Some (h, v) -> Some (HM.singleton h v) + end + | _ -> + Log.warn (fun m -> m "couldn't decode checksum in %s" filename); + None + in + match url, csum with + | Some url, Some cs -> Some (url, cs) + | _ -> None + +let extract_urls filename str = + (* in an opam file, there may be: + url { src: checksum: [ STRING ] } <- list of hash + url { src: checksum: STRING } <- single hash + url { archive: checksum: STRING } <- MD5 + extra-source NAME { src: URL checksum: [ STRING ] } (OR checksum: STRING) <- multiple occurences possible + *) + let open OpamParserTypes.FullPos in + let opamfile = OpamParser.FullPos.string str filename in + let unavailable = + List.exists + (function + | { pelem = Variable ({ pelem = "available" ; _ }, + { pelem = (Bool false | List { pelem = [{ pelem = Bool false; _ }] ; _ }); _ }) + ; _ } -> true + | _ -> false) + opamfile.file_contents + in + if unavailable then + (Log.debug (fun m -> m "%s is marked unavailable, skipping" filename); + None) + else + match + List.find_opt (function + | { pelem = Section ({ section_kind = { pelem = "url" ; _ } ; _ }) ; _} -> true | _ -> false) + opamfile.file_contents + with + | Some { pelem = Section ({ section_items = { pelem = items ; _ }; _}) ; _ } -> + extract_url_checksum filename items + | _ -> Log.debug (fun m -> m "no url section for %s" filename); None diff --git a/mirage/unikernel.ml b/mirage/unikernel.ml index 27ef584..9141774 100644 --- a/mirage/unikernel.ml +++ b/mirage/unikernel.ml @@ -57,7 +57,7 @@ module K = struct let doc = "Initialize the disk with a partition table. THIS IS DESTRUCTIVE!" in let doc = Arg.info ~doc ["initialize-disk"] in Mirage_runtime.register_arg Arg.(value & flag doc) - + let ignore_local_git = let doc = "Ignore restoring locally saved git repository." in let doc = Arg.info ~doc ["ignore-local-git"] in @@ -90,11 +90,6 @@ module Make let hex_to_key h = Mirage_kv.Key.v (Ohex.encode h) - let hex_of_string s = - match Ohex.decode s with - | d -> Ok d - | exception Invalid_argument err -> Error (`Msg err) - let hm_to_s hm = HM.fold (fun h v acc -> hash_to_string h ^ "=" ^ Ohex.encode v ^ "\n" ^ acc) @@ -123,107 +118,6 @@ module Make in go store Mirage_kv.Key.empty [] - let decode_digest filename str = - let hex h s = - match hex_of_string s with - | Ok d -> Some (h, d) - | Error `Msg msg -> - Logs.warn (fun m -> m "%s invalid hex (%s) %s" filename msg s); None - in - match String.split_on_char '=' str with - | [ data ] -> hex `MD5 data - | [ "md5" ; data ] -> hex `MD5 data - | [ "sha256" ; data ] -> hex `SHA256 data - | [ "sha512" ; data ] -> hex `SHA512 data - | [ hash ; _ ] -> Logs.warn (fun m -> m "%s unknown hash %s" filename hash); None - | _ -> Logs.warn (fun m -> m "%s unexpected hash format %S" filename str); None - - let extract_urls filename str = - (* in an opam file, there may be: - url { src: checksum: [ STRING ] } <- list of hash - url { src: checksum: STRING } <- single hash - url { archive: checksum: STRING } <- MD5 - *) - let open OpamParserTypes.FullPos in - let opamfile = OpamParser.FullPos.string str filename in - let unavailable = - List.exists - (function - | { pelem = Variable ({ pelem = "available" ; _ }, - { pelem = (Bool false | List { pelem = [{ pelem = Bool false; _ }] ; _ }); _ }) - ; _ } -> true - | _ -> false) - opamfile.file_contents - in - if unavailable then - (Logs.debug (fun m -> m "%s is marked unavailable, skipping" filename); - None) - else - let url_section = - List.find_opt (function - | { pelem = Section ({ section_kind = { pelem = "url" ; _ } ; _ }) ; _} -> true | _ -> false) - opamfile.file_contents - in - match url_section with - | Some { pelem = Section ({ section_items = { pelem = items ; _ }; _}) ; _ } -> - begin - let url = - List.find_opt - (function { pelem = Variable ({ pelem = "src" ; _ }, _); _ } -> true | _ -> false) - items - and archive = - List.find_opt - (function { pelem = Variable ({ pelem = "archive" ; _ }, _); _ } -> true | _ -> false) - items - and checksum = - List.find_opt - (function { pelem = Variable ({ pelem = "checksum" ; _ }, _); _ } -> true | _ -> false) - items - in - let url = - match url, archive with - | Some { pelem = Variable (_, { pelem = String url ; _ }) ; _ }, None -> Some url - | None, Some { pelem = Variable (_, { pelem = String url ; _ }); _ } -> Some url - | _ -> - Logs.warn (fun m -> m "%s neither src nor archive present" filename); None - in - let csum = - match checksum with - | Some { pelem = Variable (_, { pelem = List { pelem = csums ; _ } ; _ }); _ } -> - let csums = - List.fold_left (fun acc -> - function - | { pelem = String csum ; _ } -> - begin match decode_digest filename csum with - | None -> acc - | Some (h, v) -> - HM.update h (function - | None -> Some v - | Some v' when String.equal v v' -> None - | Some v' -> - Logs.warn (fun m -> m "for %s, hash %s, multiple keys are present: %s %s" - (Option.value ~default:"NONE" url) (hash_to_string h) (Ohex.encode v) (Ohex.encode v')); - None) - acc - end - | _ -> acc) HM.empty csums - in - Some csums - | Some { pelem = Variable (_, { pelem = String csum ; _ }) ; _ } -> - begin match decode_digest filename csum with - | None -> None - | Some (h, v) -> Some (HM.singleton h v) - end - | _ -> - Logs.warn (fun m -> m "couldn't decode checksum in %s" filename); - None - in - match url, csum with - | Some url, Some cs -> Some (url, cs) - | _ -> None - end - | _ -> Logs.debug (fun m -> m "no url section for %s" filename); None - let find_urls store = find_contents store >>= fun paths -> let opam_paths = @@ -234,7 +128,7 @@ module Make | Ok data -> (* TODO report parser errors *) (try - let url_csums = extract_urls (Mirage_kv.Key.to_string path) data in + let url_csums = Opam_file.extract_urls (Mirage_kv.Key.to_string path) data in Option.fold ~none:acc ~some:(fun (url, csums) -> if HM.cardinal csums = 0 then (Logs.warn (fun m -> m "no checksums for %s, ignoring" url); acc) From 9bfde48f43c15f4b6ebf0db75dd5554a6ddff9fb Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Thu, 24 Oct 2024 15:03:35 +0200 Subject: [PATCH 2/2] also handle extra-source, fixes #8 --- mirage/opam_file.ml | 30 +++++++++++++++++++++--------- mirage/unikernel.ml | 4 ++-- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/mirage/opam_file.ml b/mirage/opam_file.ml index ce1665e..e08f252 100644 --- a/mirage/opam_file.ml +++ b/mirage/opam_file.ml @@ -82,6 +82,25 @@ let extract_url_checksum filename items = | Some url, Some cs -> Some (url, cs) | _ -> None +let extract_checksums_and_urls filename opam = + let open OpamParserTypes.FullPos in + List.fold_left (fun acc -> + function + | { pelem = Section ({ section_kind = { pelem = "url" ; _ } ; section_items = { pelem = items ; _ } ; _ }) ; _} -> + begin match extract_url_checksum filename items with + | None -> acc + | Some url -> url :: acc + end + | { pelem = Section ({ section_kind = { pelem = "extra-source" ; _ } ; section_name = Some { pelem ; _ } ; section_items = { pelem = items ; _ }; _ }) ; _} -> + begin + Log.debug (fun m -> m "extracting for extra-source %s in %s" filename pelem); + match extract_url_checksum filename items with + | None -> acc + | Some url -> url :: acc + end + | _ -> acc) + [] opam.file_contents + let extract_urls filename str = (* in an opam file, there may be: url { src: checksum: [ STRING ] } <- list of hash @@ -102,13 +121,6 @@ let extract_urls filename str = in if unavailable then (Log.debug (fun m -> m "%s is marked unavailable, skipping" filename); - None) + []) else - match - List.find_opt (function - | { pelem = Section ({ section_kind = { pelem = "url" ; _ } ; _ }) ; _} -> true | _ -> false) - opamfile.file_contents - with - | Some { pelem = Section ({ section_items = { pelem = items ; _ }; _}) ; _ } -> - extract_url_checksum filename items - | _ -> Log.debug (fun m -> m "no url section for %s" filename); None + extract_checksums_and_urls filename opamfile diff --git a/mirage/unikernel.ml b/mirage/unikernel.ml index 9141774..51da59a 100644 --- a/mirage/unikernel.ml +++ b/mirage/unikernel.ml @@ -129,7 +129,7 @@ module Make (* TODO report parser errors *) (try let url_csums = Opam_file.extract_urls (Mirage_kv.Key.to_string path) data in - Option.fold ~none:acc ~some:(fun (url, csums) -> + List.fold_left (fun acc (url, csums) -> if HM.cardinal csums = 0 then (Logs.warn (fun m -> m "no checksums for %s, ignoring" url); acc) else @@ -146,7 +146,7 @@ module Make Logs.warn (fun m -> m "mismatching hashes for %s: %s vs %s" url (hm_to_s csums') (hm_to_s csums)); None - end) acc) url_csums + end) acc) acc url_csums with _ -> Logs.warn (fun m -> m "some error in %a, ignoring" Mirage_kv.Key.pp path); acc)