extra-source #14

Merged
hannes merged 2 commits from extra-source into main 2024-10-26 14:42:12 +00:00
2 changed files with 130 additions and 110 deletions

126
mirage/opam_file.ml Normal file
View file

@ -0,0 +1,126 @@
let src = Logs.Src.create "opam-file.opam-mirror" ~doc:"Opam file decoding in opam-mirror"
module Log = (val Logs.src_log src : Logs.LOG)
module HM = Archive_checksum.HM
let hash_to_string = Archive_checksum.Hash.to_string
let hex_of_string s =
match Ohex.decode s with
| d -> Ok d
| exception Invalid_argument err -> Error (`Msg err)
let decode_digest filename str =
let hex h s =
match hex_of_string s with
| Ok d -> Some (h, d)
| Error `Msg msg ->
Log.warn (fun m -> m "%s invalid hex (%s) %s" filename msg s); None
in
match String.split_on_char '=' str with
| [ data ] -> hex `MD5 data
| [ "md5" ; data ] -> hex `MD5 data
| [ "sha256" ; data ] -> hex `SHA256 data
| [ "sha512" ; data ] -> hex `SHA512 data
| [ hash ; _ ] -> Log.warn (fun m -> m "%s unknown hash %s" filename hash); None
| _ -> Log.warn (fun m -> m "%s unexpected hash format %S" filename str); None
let extract_url_checksum filename items =
let open OpamParserTypes.FullPos in
let url =
List.find_opt
(function { pelem = Variable ({ pelem = "src" ; _ }, _); _ } -> true | _ -> false)
items
and archive =
List.find_opt
(function { pelem = Variable ({ pelem = "archive" ; _ }, _); _ } -> true | _ -> false)
items
and checksum =
List.find_opt
(function { pelem = Variable ({ pelem = "checksum" ; _ }, _); _ } -> true | _ -> false)
items
in
let url =
match url, archive with
| Some { pelem = Variable (_, { pelem = String url ; _ }) ; _ }, None -> Some url
| None, Some { pelem = Variable (_, { pelem = String url ; _ }); _ } -> Some url
| _ ->
Log.warn (fun m -> m "%s neither src nor archive present" filename); None
in
let csum =
match checksum with
| Some { pelem = Variable (_, { pelem = List { pelem = csums ; _ } ; _ }); _ } ->
let csums =
List.fold_left (fun acc ->
function
| { pelem = String csum ; _ } ->
begin match decode_digest filename csum with
| None -> acc
| Some (h, v) ->
HM.update h (function
| None -> Some v
| Some v' when String.equal v v' -> None
| Some v' ->
Log.warn (fun m -> m "for %s, hash %s, multiple keys are present: %s %s"
(Option.value ~default:"NONE" url) (hash_to_string h) (Ohex.encode v) (Ohex.encode v'));
None)
acc
end
| _ -> acc) HM.empty csums
in
Some csums
| Some { pelem = Variable (_, { pelem = String csum ; _ }) ; _ } ->
begin match decode_digest filename csum with
| None -> None
| Some (h, v) -> Some (HM.singleton h v)
end
| _ ->
Log.warn (fun m -> m "couldn't decode checksum in %s" filename);
None
in
match url, csum with
| Some url, Some cs -> Some (url, cs)
| _ -> None
let extract_checksums_and_urls filename opam =
let open OpamParserTypes.FullPos in
List.fold_left (fun acc ->
function
| { pelem = Section ({ section_kind = { pelem = "url" ; _ } ; section_items = { pelem = items ; _ } ; _ }) ; _} ->
begin match extract_url_checksum filename items with
| None -> acc
| Some url -> url :: acc
end
| { pelem = Section ({ section_kind = { pelem = "extra-source" ; _ } ; section_name = Some { pelem ; _ } ; section_items = { pelem = items ; _ }; _ }) ; _} ->
begin
Log.debug (fun m -> m "extracting for extra-source %s in %s" filename pelem);
match extract_url_checksum filename items with
| None -> acc
| Some url -> url :: acc
end
| _ -> acc)
[] opam.file_contents
let extract_urls filename str =
(* in an opam file, there may be:
url { src: <string> checksum: [ STRING ] } <- list of hash
url { src: <string> checksum: STRING } <- single hash
url { archive: <string> checksum: STRING } <- MD5
extra-source NAME { src: URL checksum: [ STRING ] } (OR checksum: STRING) <- multiple occurences possible
*)
let open OpamParserTypes.FullPos in
let opamfile = OpamParser.FullPos.string str filename in
let unavailable =
List.exists
(function
| { pelem = Variable ({ pelem = "available" ; _ },
{ pelem = (Bool false | List { pelem = [{ pelem = Bool false; _ }] ; _ }); _ })
; _ } -> true
| _ -> false)
opamfile.file_contents
in
if unavailable then
(Log.debug (fun m -> m "%s is marked unavailable, skipping" filename);
[])
else
extract_checksums_and_urls filename opamfile

View file

@ -90,11 +90,6 @@ module Make
let hex_to_key h = Mirage_kv.Key.v (Ohex.encode h) let hex_to_key h = Mirage_kv.Key.v (Ohex.encode h)
let hex_of_string s =
match Ohex.decode s with
| d -> Ok d
| exception Invalid_argument err -> Error (`Msg err)
let hm_to_s hm = let hm_to_s hm =
HM.fold (fun h v acc -> HM.fold (fun h v acc ->
hash_to_string h ^ "=" ^ Ohex.encode v ^ "\n" ^ acc) hash_to_string h ^ "=" ^ Ohex.encode v ^ "\n" ^ acc)
@ -123,107 +118,6 @@ module Make
in in
go store Mirage_kv.Key.empty [] go store Mirage_kv.Key.empty []
let decode_digest filename str =
let hex h s =
match hex_of_string s with
| Ok d -> Some (h, d)
| Error `Msg msg ->
Logs.warn (fun m -> m "%s invalid hex (%s) %s" filename msg s); None
in
match String.split_on_char '=' str with
| [ data ] -> hex `MD5 data
| [ "md5" ; data ] -> hex `MD5 data
| [ "sha256" ; data ] -> hex `SHA256 data
| [ "sha512" ; data ] -> hex `SHA512 data
| [ hash ; _ ] -> Logs.warn (fun m -> m "%s unknown hash %s" filename hash); None
| _ -> Logs.warn (fun m -> m "%s unexpected hash format %S" filename str); None
let extract_urls filename str =
(* in an opam file, there may be:
url { src: <string> checksum: [ STRING ] } <- list of hash
url { src: <string> checksum: STRING } <- single hash
url { archive: <string> checksum: STRING } <- MD5
*)
let open OpamParserTypes.FullPos in
let opamfile = OpamParser.FullPos.string str filename in
let unavailable =
List.exists
(function
| { pelem = Variable ({ pelem = "available" ; _ },
{ pelem = (Bool false | List { pelem = [{ pelem = Bool false; _ }] ; _ }); _ })
; _ } -> true
| _ -> false)
opamfile.file_contents
in
if unavailable then
(Logs.debug (fun m -> m "%s is marked unavailable, skipping" filename);
None)
else
let url_section =
List.find_opt (function
| { pelem = Section ({ section_kind = { pelem = "url" ; _ } ; _ }) ; _} -> true | _ -> false)
opamfile.file_contents
in
match url_section with
| Some { pelem = Section ({ section_items = { pelem = items ; _ }; _}) ; _ } ->
begin
let url =
List.find_opt
(function { pelem = Variable ({ pelem = "src" ; _ }, _); _ } -> true | _ -> false)
items
and archive =
List.find_opt
(function { pelem = Variable ({ pelem = "archive" ; _ }, _); _ } -> true | _ -> false)
items
and checksum =
List.find_opt
(function { pelem = Variable ({ pelem = "checksum" ; _ }, _); _ } -> true | _ -> false)
items
in
let url =
match url, archive with
| Some { pelem = Variable (_, { pelem = String url ; _ }) ; _ }, None -> Some url
| None, Some { pelem = Variable (_, { pelem = String url ; _ }); _ } -> Some url
| _ ->
Logs.warn (fun m -> m "%s neither src nor archive present" filename); None
in
let csum =
match checksum with
| Some { pelem = Variable (_, { pelem = List { pelem = csums ; _ } ; _ }); _ } ->
let csums =
List.fold_left (fun acc ->
function
| { pelem = String csum ; _ } ->
begin match decode_digest filename csum with
| None -> acc
| Some (h, v) ->
HM.update h (function
| None -> Some v
| Some v' when String.equal v v' -> None
| Some v' ->
Logs.warn (fun m -> m "for %s, hash %s, multiple keys are present: %s %s"
(Option.value ~default:"NONE" url) (hash_to_string h) (Ohex.encode v) (Ohex.encode v'));
None)
acc
end
| _ -> acc) HM.empty csums
in
Some csums
| Some { pelem = Variable (_, { pelem = String csum ; _ }) ; _ } ->
begin match decode_digest filename csum with
| None -> None
| Some (h, v) -> Some (HM.singleton h v)
end
| _ ->
Logs.warn (fun m -> m "couldn't decode checksum in %s" filename);
None
in
match url, csum with
| Some url, Some cs -> Some (url, cs)
| _ -> None
end
| _ -> Logs.debug (fun m -> m "no url section for %s" filename); None
let find_urls store = let find_urls store =
find_contents store >>= fun paths -> find_contents store >>= fun paths ->
let opam_paths = let opam_paths =
@ -234,8 +128,8 @@ module Make
| Ok data -> | Ok data ->
(* TODO report parser errors *) (* TODO report parser errors *)
(try (try
let url_csums = extract_urls (Mirage_kv.Key.to_string path) data in let url_csums = Opam_file.extract_urls (Mirage_kv.Key.to_string path) data in
Option.fold ~none:acc ~some:(fun (url, csums) -> List.fold_left (fun acc (url, csums) ->
if HM.cardinal csums = 0 then if HM.cardinal csums = 0 then
(Logs.warn (fun m -> m "no checksums for %s, ignoring" url); acc) (Logs.warn (fun m -> m "no checksums for %s, ignoring" url); acc)
else else
@ -252,7 +146,7 @@ module Make
Logs.warn (fun m -> m "mismatching hashes for %s: %s vs %s" Logs.warn (fun m -> m "mismatching hashes for %s: %s vs %s"
url (hm_to_s csums') (hm_to_s csums)); url (hm_to_s csums') (hm_to_s csums));
None None
end) acc) url_csums end) acc) acc url_csums
with _ -> with _ ->
Logs.warn (fun m -> m "some error in %a, ignoring" Mirage_kv.Key.pp path); Logs.warn (fun m -> m "some error in %a, ignoring" Mirage_kv.Key.pp path);
acc) acc)