git clone and extract urls & checksums

This commit is contained in:
Hannes Mehnert 2022-08-25 22:47:46 +02:00
parent 788f58d3cf
commit 6fe03ce867
2 changed files with 257 additions and 22 deletions

View file

@ -7,19 +7,45 @@ let paf_conf () =
let packages = [ package "paf" ~sublibs:[ "mirage" ] ] in
impl ~packages "Paf_mirage.Make" (time @-> tcpv4v6 @-> paf)
let uri =
let doc = Key.Arg.info ~doc:"URI" [ "u"; "uri" ] in
Key.(create "uri" Arg.(required string doc))
let remote =
let doc = Key.Arg.info
~doc:"Remote repository url, use suffix #foo to specify a branch 'foo': \
https://github.com/ocaml/opam-repository.git"
["remote"]
in
Key.(create "remote" Arg.(opt string "https://github.com/ocaml/opam-repository.git#master" doc))
let tls_authenticator =
(* this will not look the same in the help printout *)
let doc = "TLS host authenticator. See git_http in lib/mirage/mirage.mli for a description of the format."
in
let doc = Key.Arg.info ~doc ["tls-authenticator"] in
Key.(create "tls-authenticator" Arg.(opt (some string) None doc))
let mirror =
foreign "Unikernel.Make"
~keys:[ Key.v uri ]
~packages:[ package "paf" ~min:"0.0.9" ; package "paf-cohttp" ~min:"0.0.7" ]
(time @-> pclock @-> stackv4v6 @-> dns_client @-> paf @-> job)
~keys:[ Key.v remote ; Key.v tls_authenticator ]
~packages:[
package "paf" ~min:"0.0.9" ;
package "paf-cohttp" ~min:"0.0.7" ;
package ~min:"3.0.0" "irmin-mirage-git" ;
package ~min:"3.7.0" "git-paf" ;
package "opam-file-format" ;
]
(time @-> pclock @-> stackv4v6 @-> dns_client @-> paf @-> git_client @-> job)
let paf time stackv4v6 = paf_conf () $ time $ tcpv4v6_of_stackv4v6 stackv4v6
let stackv4v6 = generic_stackv4v6 default_network
let stack = generic_stackv4v6 default_network
let dns = generic_dns_client stack
let tcp = tcpv4v6_of_stackv4v6 stack
let git_client =
let git = git_happy_eyeballs stack dns (generic_happy_eyeballs stack dns) in
merge_git_clients (git_tcp tcp git)
(git_http ~authenticator:tls_authenticator tcp git)
let () = register "mirror"
[ mirror $ default_time $ default_posix_clock $ stackv4v6 $ generic_dns_client stackv4v6 $ paf default_time stackv4v6 ]
[ mirror $ default_time $ default_posix_clock $ stack $ dns $ paf default_time stack $ git_client ]

View file

@ -7,12 +7,19 @@ end
open Lwt.Infix
let argument_error = 64
module Make
(Time : Mirage_time.S)
(Pclock : Mirage_clock.PCLOCK)
(Stack : Tcpip.Stack.V4V6)
(Dns : DNS) (* XXX(dinosaure): ask @hannesm to provide a signature. *)
(Paf : Paf_mirage.S with type stack = Stack.TCP.t and type ipaddr = Ipaddr.t) = struct
(Paf : Paf_mirage.S with type stack = Stack.TCP.t and type ipaddr = Ipaddr.t)
(_ : sig end) = struct
module Store = Irmin_mirage_git.Mem.KV.Make(Irmin.Contents.String)
module Sync = Irmin.Sync.Make(Store)
module Client = Paf_cohttp
module Nss = Ca_certs_nss.Make(Pclock)
@ -60,9 +67,211 @@ module Make
| _ -> Lwt.return_none in
Mimic.(fold Client.ipaddr Fun.[ req dns; req Client.domain_name ] ~k ctx)
let start _time _pclock stack dns _paf_cohttp =
let uri = Uri.of_string (Key_gen.uri ()) in
let ctx =
module SM = Map.Make(String)
module HM = Map.Make(struct
type t = Mirage_crypto.Hash.hash
let compare = compare (* TODO remove polymorphic compare *)
end)
let hash_to_string = function
| `MD5 -> "md5"
| `SHA1 -> "sha1"
| `SHA224 -> "sha224"
| `SHA256 -> "sha256"
| `SHA384 -> "sha384"
| `SHA512 -> "sha512"
let hex_to_string h =
let `Hex h = Hex.of_string h in
h
let hm_to_s hm =
HM.fold (fun h v acc ->
hash_to_string h ^ "=" ^ hex_to_string v ^ "\n" ^ acc)
hm ""
module Git = struct
let decompose_git_url () =
match String.split_on_char '#' (Key_gen.remote ()) with
| [ url ] -> url, None
| [ url ; branch ] -> url, Some branch
| _ ->
Logs.err (fun m -> m "expected at most a single # in remote");
exit argument_error
let connect ctx =
let uri, branch = decompose_git_url () in
let config = Irmin_mem.config () in
Store.Repo.v config >>= fun r ->
(match branch with
| None -> Store.main r
| Some branch -> Store.of_branch r branch) >|= fun repo ->
Logs.info (fun m -> m "connected to %s (branch %s)"
uri (Option.value ~default:"main" branch));
repo, Store.remote ~ctx uri
let pull store upstream =
Logs.info (fun m -> m "pulling from remote!");
Sync.pull ~depth:1 store upstream `Set >|= fun r ->
match r with
| Ok (`Head _ as s) -> Ok (Fmt.str "pulled %a" Sync.pp_status s)
| Ok `Empty -> Error (`Msg "pulled empty repository")
| Error (`Msg e) -> Error (`Msg ("pull error " ^ e))
| Error (`Conflict msg) -> Error (`Msg ("pull conflict " ^ msg))
let find_contents store =
let rec go store path acc =
Store.list store path >>= fun steps ->
Lwt_list.fold_left_s (fun acc (step, _) ->
let full_path = path @ [ step ] in
let str = String.concat "/" full_path in
Store.kind store full_path >>= function
| None ->
Logs.warn (fun m -> m "no kind for %s" str);
Lwt.return acc
| Some `Contents -> Lwt.return (full_path :: acc)
| Some `Node -> go store full_path acc) acc steps
in
go store [] [] >|= fun contents ->
Logs.info (fun m -> m "%d contents" (List.length contents));
contents
let decode_digest filename str =
let hex h s =
match Hex.to_string (`Hex s) with
| d -> Some (h, d)
| exception Invalid_argument _ -> Logs.warn (fun m -> m "%s invalid hex %s" filename s); None
in
match String.split_on_char '=' str with
| [ data ] -> hex `MD5 data
| [ "md5" ; data ] -> hex `MD5 data
| [ "sha256" ; data ] -> hex `SHA256 data
| [ "sha512" ; data ] -> hex `SHA512 data
| [ hash ; _ ] -> Logs.warn (fun m -> m "%s unknown hash %s" filename hash); None
| _ -> Logs.warn (fun m -> m "%s unexpected hash format %S" filename str); None
let extract_urls filename str =
(* in an opam file, there may be:
url { src: <string> checksum: [ STRING ] } <- list of hash
url { src: <string> checksum: STRING } <- single hash
url { archive: <string> checksum: STRING } <- MD5
*)
let open OpamParserTypes.FullPos in
let opamfile = OpamParser.FullPos.string str filename in
let url_section =
List.find_opt (function
| { pelem = Section ({ section_kind = { pelem = "url" ; _ } ; _ }) ; _} -> true | _ -> false)
opamfile.file_contents
in
match url_section with
| Some { pelem = Section ({ section_items = { pelem = items ; _ }; _}) ; _ } ->
begin
let url =
List.find_opt
(function { pelem = Variable ({ pelem = "src" ; _ }, _); _ } -> true | _ -> false)
items
and archive =
List.find_opt
(function { pelem = Variable ({ pelem = "archive" ; _ }, _); _ } -> true | _ -> false)
items
and checksum =
List.find_opt
(function { pelem = Variable ({ pelem = "checksum" ; _ }, _); _ } -> true | _ -> false)
items
in
let url =
match url, archive with
| Some { pelem = Variable (_, { pelem = String url ; _ }) }, None -> Some url
| None, Some { pelem = Variable (_, { pelem = String url ; _ }) } -> Some url
| _ ->
Logs.warn (fun m -> m "%s neither src nor archive present" filename); None
in
let csum =
match checksum with
| Some { pelem = Variable (_, { pelem = List { pelem = csums ; _ } ; _ }); _ } ->
let csums =
List.fold_left (fun acc ->
function
| { pelem = String csum ; _ } ->
begin match decode_digest filename csum with
| None -> acc
| Some (h, v) ->
HM.update h (function
| None -> Some v
| Some v' when String.equal v v' -> None
| Some v' ->
Logs.warn (fun m -> m "for %s, hash %s, multiple keys are present: %s %s"
(Option.value ~default:"NONE" url) (hash_to_string h) (hex_to_string v) (hex_to_string v'));
None)
acc
end
| _ -> acc) HM.empty csums
in
Some csums
| Some { pelem = Variable (_, { pelem = String csum ; _ }) ; _ } ->
begin match decode_digest filename csum with
| None -> None
| Some (h, v) -> Some (HM.singleton h v)
end
| _ ->
Logs.warn (fun m -> m "couldn't decode checksum in %s" filename);
None
in
match url, csum with
| Some url, Some cs -> Some (url, cs)
| _ -> None
end
| _ -> Logs.debug (fun m -> m "no url section for %s" filename); None
let find_urls store =
find_contents store >>= fun paths ->
let opam_paths =
List.filter (fun p -> match List.rev p with
| "opam" :: _ -> true | _ -> false)
paths
in
Lwt_list.fold_left_s (fun acc path ->
Store.find store path >|= function
| Some data ->
(* TODO report parser errors *)
(try
let url_csums = extract_urls (String.concat "/" path) data in
Option.fold ~none:acc ~some:(fun (url, csums) ->
if HM.cardinal csums = 0 then
(Logs.warn (fun m -> m "no checksums for %s, ignoring" url); acc)
else
SM.update url (function
| None -> Some csums
| Some csums' ->
if HM.for_all (fun h v ->
match HM.find_opt h csums with
| None -> true | Some v' -> String.equal v v')
csums'
then
Some (HM.union (fun _h v _v' -> Some v) csums csums')
else begin
Logs.warn (fun m -> m "mismatching hashes for %s: %s vs %s"
url (hm_to_s csums') (hm_to_s csums));
None
end) acc) url_csums
with _ ->
Logs.warn (fun m -> m "some error in %s, ignoring" (String.concat "/" path));
acc)
| None -> acc)
SM.empty opam_paths >|= fun urls ->
Logs.info (fun m -> m "map contains %d urls" (SM.cardinal urls))
(* SM.iter (fun url csums -> Logs.info (fun m -> m "%s: %s" url (hm_to_s csums))) urls *)
end
let start _time _pclock stack dns _paf_cohttp git_ctx =
Git.connect git_ctx >>= fun (store, upstream) ->
Git.pull store upstream >>= function
| Error `Msg msg -> Lwt.fail_with msg
| Ok msg ->
Logs.info (fun m -> m "store: %s" msg);
Git.find_urls store >|= fun () ->
let _ctx =
Mimic.empty
|> with_sleep
|> with_tcp (* stack -> ipaddr -> port => (stack * ipaddr * port) *)
@ -70,7 +279,7 @@ module Make
|> with_resolv (* domain_name => ipaddr *)
|> with_stack stack (* stack *)
|> with_dns dns (* dns *) in
Client.get ~ctx uri >>= fun (_resp, body) ->
Cohttp_lwt.Body.to_string body >|= fun str ->
Logs.info (fun m -> m "%S\n%!" str)
(* Client.get ~ctx uri >>= fun (_resp, body) ->
Cohttp_lwt.Body.to_string body >|= fun str -> *)
Logs.info (fun m -> m "done")
end