revise startup, address urls pointing to same sha256 and support mirrors (upstream and in opam file) #24

Merged
hannes merged 7 commits from startup into main 2024-11-20 10:38:22 +00:00
Showing only changes of commit 4481923ade - Show all commits

View file

@ -9,11 +9,11 @@ module K = struct
let doc = Arg.info ~doc:"Only check the cache" ["check"] in let doc = Arg.info ~doc:"Only check the cache" ["check"] in
Mirage_runtime.register_arg Arg.(value & flag doc) Mirage_runtime.register_arg Arg.(value & flag doc)
let verify_sha256 = let skip_verify_sha256 =
let doc = Arg.info let doc = Arg.info
~doc:"Verify the SHA256 checksums of the cache contents, and \ ~doc:"Skip verification of the SHA256 checksums of the cache contents, \
re-build the other checksum caches." and do not re-build the other checksum caches."
["verify-sha256"] ["skip-verify-sha256"]
in in
Mirage_runtime.register_arg Arg.(value & flag doc) Mirage_runtime.register_arg Arg.(value & flag doc)
@ -241,13 +241,17 @@ module Make
type t = { type t = {
mutable md5s : string SM.t ; mutable md5s : string SM.t ;
mutable sha512s : string SM.t ; mutable sha512s : string SM.t ;
mutable checked : bool ;
dev : KV.t ; dev : KV.t ;
dev_md5s : Cache.t ; dev_md5s : Cache.t ;
dev_sha512s : Cache.t ; dev_sha512s : Cache.t ;
dev_swap : Swap.t ; dev_swap : Swap.t ;
} }
let empty dev dev_md5s dev_sha512s dev_swap = { md5s = SM.empty ; sha512s = SM.empty ; dev; dev_md5s; dev_sha512s ; dev_swap } let empty dev dev_md5s dev_sha512s dev_swap =
{ md5s = SM.empty ; sha512s = SM.empty ; checked = false ; dev; dev_md5s; dev_sha512s ; dev_swap }
let ready t = t.checked
let marshal_sm (sm : string SM.t) = let marshal_sm (sm : string SM.t) =
let version = char_of_int 1 in let version = char_of_int 1 in
@ -272,18 +276,20 @@ module Make
Lwt.return_unit Lwt.return_unit
let find_key t h key = let find_key t h key =
assert (List.length (Mirage_kv.Key.segments key) = 1); if List.length (Mirage_kv.Key.segments key) <> 1 then begin
match Logs.warn (fun m -> m "find_key with multiple segments: %a" Mirage_kv.Key.pp key);
match h with Error `Not_found
| `MD5 -> end else
Option.map Mirage_kv.Key.v (SM.find_opt (Mirage_kv.Key.basename key) t.md5s) match
| `SHA512 -> match h with
Option.map Mirage_kv.Key.v (SM.find_opt (Mirage_kv.Key.basename key) t.sha512s) | `MD5 ->
| `SHA256 -> Some key Option.map Mirage_kv.Key.v (SM.find_opt (Mirage_kv.Key.basename key) t.md5s)
| _ -> None | `SHA512 ->
with Option.map Mirage_kv.Key.v (SM.find_opt (Mirage_kv.Key.basename key) t.sha512s)
| None -> Error `Not_found | `SHA256 -> Some key
| Some x -> Ok x with
| None -> Error `Not_found
| Some x -> Ok x
let read_chunked t h v f a = let read_chunked t h v f a =
match find_key t h v with match find_key t h v with
@ -392,22 +398,23 @@ module Make
end end
(* on disk, we use a flat file system where the filename is the sha256 of the data *) (* on disk, we use a flat file system where the filename is the sha256 of the data *)
let init ~verify_sha256 dev dev_md5s dev_sha512s dev_swap = let check ~skip_verify_sha256 t =
KV.list dev Mirage_kv.Key.empty >>= function KV.list t.dev Mirage_kv.Key.empty >>= function
| Error e -> invalid_arg (Fmt.str "error %a listing kv" KV.pp_error e) | Error e ->
Logs.err (fun m -> m "error %a listing kv" KV.pp_error e);
Lwt.return_unit
| Ok entries -> | Ok entries ->
let t = empty dev dev_md5s dev_sha512s dev_swap in
Cache.read t.dev_md5s >>= fun r -> Cache.read t.dev_md5s >>= fun r ->
(match r with (match r with
| Ok Some s -> | Ok Some s ->
if not verify_sha256 then if skip_verify_sha256 then
Result.iter (fun md5s -> t.md5s <- md5s) (unmarshal_sm s) Result.iter (fun md5s -> t.md5s <- md5s) (unmarshal_sm s)
| Ok None -> () | Ok None -> ()
| Error e -> Logs.warn (fun m -> m "Error reading md5s cache: %a" Cache.pp_error e)); | Error e -> Logs.warn (fun m -> m "Error reading md5s cache: %a" Cache.pp_error e));
Cache.read t.dev_sha512s >>= fun r -> Cache.read t.dev_sha512s >>= fun r ->
(match r with (match r with
| Ok Some s -> | Ok Some s ->
if not verify_sha256 then if skip_verify_sha256 then
Result.iter (fun sha512s -> t.sha512s <- sha512s) (unmarshal_sm s) Result.iter (fun sha512s -> t.sha512s <- sha512s) (unmarshal_sm s)
| Ok None -> () | Ok None -> ()
| Error e -> Logs.warn (fun m -> m "Error reading sha512s cache: %a" Cache.pp_error e)); | Error e -> Logs.warn (fun m -> m "Error reading sha512s cache: %a" Cache.pp_error e));
@ -438,7 +445,7 @@ module Make
None None
in in
let sha256_final = let sha256_final =
let need_to_compute = md5_final <> None || sha512_final <> None || verify_sha256 in let need_to_compute = md5_final <> None || sha512_final <> None || not skip_verify_sha256 in
if need_to_compute then if need_to_compute then
let f s = let f s =
let digest = SHA256.(to_raw_string (get s)) in let digest = SHA256.(to_raw_string (get s)) in
@ -485,7 +492,7 @@ module Make
end) end)
entries >>= fun () -> entries >>= fun () ->
update_caches t >|= fun () -> update_caches t >|= fun () ->
t t.checked <- true
let exists t h v = let exists t h v =
match find_key t h v with match find_key t h v with
@ -841,38 +848,43 @@ stamp: %S
| Ok h -> | Ok h ->
let hash = Mirage_kv.Key.v hash in let hash = Mirage_kv.Key.v hash in
Lwt.async (fun () -> Lwt.async (fun () ->
(Disk.last_modified store h hash >|= function if Disk.ready store then
| Error _ -> t.modified (Disk.last_modified store h hash >|= function
| Ok v -> ptime_to_http_date v) >>= fun last_modified -> | Error _ -> t.modified
if not_modified request (last_modified, Mirage_kv.Key.basename hash) then | Ok v -> ptime_to_http_date v) >>= fun last_modified ->
let resp = Httpaf.Response.create `Not_modified in if not_modified request (last_modified, Mirage_kv.Key.basename hash) then
respond_with_empty reqd resp; let resp = Httpaf.Response.create `Not_modified in
Lwt.return_unit respond_with_empty reqd resp;
else
Disk.size store h hash >>= function
| Error _ ->
not_found reqd request.Httpaf.Request.target;
Lwt.return_unit Lwt.return_unit
| Ok size -> else
let size = Optint.Int63.to_string size in Disk.size store h hash >>= function
let mime_type = "application/octet-stream" in | Error _ ->
let headers = [ not_found reqd request.Httpaf.Request.target;
"content-type", mime_type ; Lwt.return_unit
"etag", Mirage_kv.Key.basename hash ; | Ok size ->
"last-modified", last_modified ; let size = Optint.Int63.to_string size in
"content-length", size ; let mime_type = "application/octet-stream" in
] let headers = [
in "content-type", mime_type ;
let headers = Httpaf.Headers.of_list headers in "etag", Mirage_kv.Key.basename hash ;
let resp = Httpaf.Response.create ~headers `OK in "last-modified", last_modified ;
let body = Httpaf.Reqd.respond_with_streaming reqd resp in "content-length", size ;
Disk.read_chunked store h hash (fun () chunk -> ]
let wait, wakeup = Lwt.task () in in
(* FIXME: catch exception when body is closed *) let headers = Httpaf.Headers.of_list headers in
Httpaf.Body.write_string body chunk; let resp = Httpaf.Response.create ~headers `OK in
Httpaf.Body.flush body (Lwt.wakeup wakeup); let body = Httpaf.Reqd.respond_with_streaming reqd resp in
wait) () >|= fun _ -> Disk.read_chunked store h hash (fun () chunk ->
Httpaf.Body.close_writer body) let wait, wakeup = Lwt.task () in
(* FIXME: catch exception when body is closed *)
Httpaf.Body.write_string body chunk;
Httpaf.Body.flush body (Lwt.wakeup wakeup);
wait) () >|= fun _ ->
Httpaf.Body.close_writer body
else begin
not_found reqd request.Httpaf.Request.target;
Lwt.return_unit
end)
end end
| _ -> | _ ->
Logs.warn (fun m -> m "unknown request %s" request.Httpaf.Request.target); Logs.warn (fun m -> m "unknown request %s" request.Httpaf.Request.target);
@ -881,7 +893,6 @@ stamp: %S
end end
let download_archives parallel_downloads disk http_client urls = let download_archives parallel_downloads disk http_client urls =
(* FIXME: handle resuming partial downloads *)
reset_failed_downloads (); reset_failed_downloads ();
remaining_downloads := SM.cardinal urls; remaining_downloads := SM.cardinal urls;
archives := SM.cardinal urls; archives := SM.cardinal urls;
@ -957,10 +968,10 @@ stamp: %S
Cache.connect sha512s >>= fun sha512s -> Cache.connect sha512s >>= fun sha512s ->
Swap.connect swap >>= fun swap -> Swap.connect swap >>= fun swap ->
Logs.info (fun m -> m "Available bytes in tar storage: %Ld" (KV.free kv)); Logs.info (fun m -> m "Available bytes in tar storage: %Ld" (KV.free kv));
Disk.init ~verify_sha256:(K.verify_sha256 ()) kv md5s sha512s swap >>= fun disk -> let disk = Disk.empty kv md5s sha512s swap in
let remote = K.remote () in let remote = K.remote () in
if K.check () then if K.check () then
Lwt.return_unit Disk.check ~skip_verify_sha256:(K.skip_verify_sha256 ()) disk
else else
begin begin
Logs.info (fun m -> m "Initializing git state. This may take a while..."); Logs.info (fun m -> m "Initializing git state. This may take a while...");
@ -968,23 +979,27 @@ stamp: %S
Lwt.return (Error ()) Lwt.return (Error ())
else else
restore_git ~remote git_dump git_ctx) >>= function restore_git ~remote git_dump git_ctx) >>= function
| Ok git_kv -> Lwt.return git_kv | Ok git_kv -> Lwt.return (false, git_kv)
| Error () -> | Error () ->
Git_kv.connect git_ctx remote >>= fun git_kv -> Git_kv.connect git_ctx remote >>= fun git_kv ->
dump_git git_dump git_kv >|= fun () -> Lwt.return (true, git_kv)
git_kv end >>= fun (need_dump, git_kv) ->
end >>= fun git_kv ->
Logs.info (fun m -> m "Done initializing git state!"); Logs.info (fun m -> m "Done initializing git state!");
Serve.commit_id git_kv >>= fun commit_id -> Serve.commit_id git_kv >>= fun commit_id ->
Logs.info (fun m -> m "git: %s" commit_id); Logs.info (fun m -> m "git: %s" commit_id);
Serve.create remote git_kv >>= fun (serve, urls) -> Serve.create remote git_kv >>= fun (serve, urls) ->
Paf.init ~port:(K.port ()) (Stack.tcp stack) >>= fun t -> Paf.init ~port:(K.port ()) (Stack.tcp stack) >>= fun t ->
let update () = let update () =
Serve.update_git ~remote serve git_kv >>= function if Disk.ready disk then
| None | Some ([], _) -> Lwt.return_unit Serve.update_git ~remote serve git_kv >>= function
| Some (_changes, urls) -> | None | Some ([], _) -> Lwt.return_unit
dump_git git_dump git_kv >>= fun () -> | Some (_changes, urls) ->
download_archives (K.parallel_downloads ()) disk http_ctx urls dump_git git_dump git_kv >>= fun () ->
download_archives (K.parallel_downloads ()) disk http_ctx urls
else begin
Logs.warn (fun m -> m "disk is not ready yet, thus not updating");
Lwt.return_unit
end
in in
let service = let service =
Paf.http_service Paf.http_service
@ -993,6 +1008,14 @@ stamp: %S
in in
let `Initialized th = Paf.serve service t in let `Initialized th = Paf.serve service t in
Logs.info (fun f -> f "listening on %d/HTTP" (K.port ())); Logs.info (fun f -> f "listening on %d/HTTP" (K.port ()));
Lwt.join [
(if need_dump then begin
Logs.info (fun m -> m "dumping git state %s" commit_id);
dump_git git_dump git_kv
end else
Lwt.return_unit) ;
(Disk.check ~skip_verify_sha256:(K.skip_verify_sha256 ()) disk)
] >>= fun () ->
Lwt.async (fun () -> Lwt.async (fun () ->
let rec go () = let rec go () =
Time.sleep_ns (Duration.of_hour 1) >>= fun () -> Time.sleep_ns (Duration.of_hour 1) >>= fun () ->