2022-08-25 12:57:03 +00:00
|
|
|
open Lwt.Infix
|
|
|
|
|
2022-08-25 20:47:46 +00:00
|
|
|
let argument_error = 64
|
|
|
|
|
2024-07-15 10:22:57 +00:00
|
|
|
module K = struct
|
|
|
|
open Cmdliner
|
|
|
|
|
|
|
|
let check =
|
|
|
|
let doc = Arg.info ~doc:"Only check the cache" ["check"] in
|
2024-10-11 11:31:48 +00:00
|
|
|
Mirage_runtime.register_arg Arg.(value & flag doc)
|
|
|
|
|
2024-07-15 10:22:57 +00:00
|
|
|
let verify_sha256 =
|
|
|
|
let doc = Arg.info
|
|
|
|
~doc:"Verify the SHA256 checksums of the cache contents, and \
|
|
|
|
re-build the other checksum caches."
|
|
|
|
["verify-sha256"]
|
|
|
|
in
|
2024-10-11 11:31:48 +00:00
|
|
|
Mirage_runtime.register_arg Arg.(value & flag doc)
|
|
|
|
|
2024-07-15 10:22:57 +00:00
|
|
|
let remote =
|
|
|
|
let doc = Arg.info
|
|
|
|
~doc:"Remote repository url, use suffix #foo to specify a branch 'foo': \
|
|
|
|
https://github.com/ocaml/opam-repository.git"
|
|
|
|
["remote"]
|
|
|
|
in
|
2024-10-11 11:31:48 +00:00
|
|
|
Mirage_runtime.register_arg
|
|
|
|
Arg.(value & opt string "https://github.com/ocaml/opam-repository.git#master" doc)
|
|
|
|
|
2024-07-15 10:22:57 +00:00
|
|
|
let parallel_downloads =
|
|
|
|
let doc = Arg.info
|
|
|
|
~doc:"Amount of parallel HTTP downloads"
|
|
|
|
["parallel-downloads"]
|
|
|
|
in
|
2024-10-11 11:31:48 +00:00
|
|
|
Mirage_runtime.register_arg Arg.(value & opt int 20 doc)
|
|
|
|
|
2024-07-15 10:22:57 +00:00
|
|
|
let hook_url =
|
|
|
|
let doc = Arg.info
|
|
|
|
~doc:"URL to conduct an update of the git repository" ["hook-url"]
|
|
|
|
in
|
2024-10-11 11:31:48 +00:00
|
|
|
Mirage_runtime.register_arg Arg.(value & opt string "update" doc)
|
|
|
|
|
2024-07-15 10:22:57 +00:00
|
|
|
let port =
|
|
|
|
let doc = Arg.info ~doc:"HTTP listen port." ["port"] in
|
2024-10-11 11:31:48 +00:00
|
|
|
Mirage_runtime.register_arg Arg.(value & opt int 80 doc)
|
|
|
|
|
2024-07-15 10:22:57 +00:00
|
|
|
let sectors_cache =
|
2024-10-09 16:42:01 +00:00
|
|
|
let doc = "Number of sectors reserved for each checksum cache (md5, sha512). Only used with --initialize-disk." in
|
2024-07-15 10:22:57 +00:00
|
|
|
let doc = Arg.info ~doc ["sectors-cache"] in
|
2024-10-11 11:31:48 +00:00
|
|
|
Mirage_runtime.register_arg Arg.(value & opt int64 Int64.(mul 4L 2048L) doc)
|
|
|
|
|
2024-07-15 10:22:57 +00:00
|
|
|
let sectors_git =
|
2024-10-09 16:42:01 +00:00
|
|
|
let doc = "Number of sectors reserved for git dump. Only used with --initialize-disk" in
|
2024-07-15 10:22:57 +00:00
|
|
|
let doc = Arg.info ~doc ["sectors-git"] in
|
2024-10-11 11:31:48 +00:00
|
|
|
Mirage_runtime.register_arg Arg.(value & opt int64 Int64.(mul 40L (mul 2L 1024L)) doc)
|
|
|
|
|
2024-11-01 13:35:08 +00:00
|
|
|
let sectors_swap =
|
|
|
|
let doc = "Number of sectors reserved for swap. Only used with --initialize-disk" in
|
|
|
|
let doc = Arg.info ~doc ["sectors-swap"] in
|
|
|
|
Mirage_runtime.register_arg Arg.(value & opt int64 Int64.(mul 1024L 2048L) doc)
|
|
|
|
|
2024-10-09 16:42:01 +00:00
|
|
|
let initialize_disk =
|
|
|
|
let doc = "Initialize the disk with a partition table. THIS IS DESTRUCTIVE!" in
|
|
|
|
let doc = Arg.info ~doc ["initialize-disk"] in
|
|
|
|
Mirage_runtime.register_arg Arg.(value & flag doc)
|
2024-10-24 12:45:31 +00:00
|
|
|
|
2024-07-15 10:22:57 +00:00
|
|
|
let ignore_local_git =
|
|
|
|
let doc = "Ignore restoring locally saved git repository." in
|
|
|
|
let doc = Arg.info ~doc ["ignore-local-git"] in
|
2024-10-11 11:31:48 +00:00
|
|
|
Mirage_runtime.register_arg Arg.(value & flag doc)
|
2024-07-15 10:22:57 +00:00
|
|
|
end
|
|
|
|
|
2022-08-25 12:57:03 +00:00
|
|
|
module Make
|
2022-09-07 07:08:45 +00:00
|
|
|
(BLOCK : Mirage_block.S)
|
2022-08-25 12:57:03 +00:00
|
|
|
(Time : Mirage_time.S)
|
|
|
|
(Pclock : Mirage_clock.PCLOCK)
|
|
|
|
(Stack : Tcpip.Stack.V4V6)
|
2022-08-29 16:32:32 +00:00
|
|
|
(_ : sig end)
|
|
|
|
(HTTP : Http_mirage_client.S) = struct
|
2022-08-25 20:47:46 +00:00
|
|
|
|
2024-10-04 14:03:54 +00:00
|
|
|
module Part = Partitions.Make(BLOCK)
|
2022-10-21 13:56:20 +00:00
|
|
|
module KV = Tar_mirage.Make_KV_RW(Pclock)(Part)
|
2022-09-26 15:46:40 +00:00
|
|
|
module Cache = OneFFS.Make(Part)
|
2024-11-01 13:35:08 +00:00
|
|
|
module Swap = Swapfs.Make(Part)
|
2022-10-21 13:56:20 +00:00
|
|
|
module Store = Git_kv.Make(Pclock)
|
2022-09-07 07:08:45 +00:00
|
|
|
|
2022-08-25 20:47:46 +00:00
|
|
|
module SM = Map.Make(String)
|
2022-09-26 15:46:40 +00:00
|
|
|
module SSet = Set.Make(String)
|
2022-08-25 20:47:46 +00:00
|
|
|
|
2024-10-03 12:37:37 +00:00
|
|
|
let compare_hash = Archive_checksum.Hash.compare
|
|
|
|
|
|
|
|
module HM = Archive_checksum.HM
|
|
|
|
|
|
|
|
let hash_to_string = Archive_checksum.Hash.to_string
|
|
|
|
|
|
|
|
let hash_of_string = Archive_checksum.Hash.of_string
|
|
|
|
|
|
|
|
let hex_to_key h = Mirage_kv.Key.v (Ohex.encode h)
|
2023-01-23 08:13:42 +00:00
|
|
|
|
2022-08-25 20:47:46 +00:00
|
|
|
let hm_to_s hm =
|
|
|
|
HM.fold (fun h v acc ->
|
2024-10-03 12:37:37 +00:00
|
|
|
hash_to_string h ^ "=" ^ Ohex.encode v ^ "\n" ^ acc)
|
2022-08-25 20:47:46 +00:00
|
|
|
hm ""
|
|
|
|
|
|
|
|
module Git = struct
|
|
|
|
let find_contents store =
|
|
|
|
let rec go store path acc =
|
2022-10-20 14:09:05 +00:00
|
|
|
Store.list store path >>= function
|
2022-09-26 09:26:58 +00:00
|
|
|
| Error e ->
|
|
|
|
Logs.err (fun m -> m "error %a while listing %a"
|
2022-10-20 14:09:05 +00:00
|
|
|
Store.pp_error e Mirage_kv.Key.pp path);
|
2022-09-26 09:26:58 +00:00
|
|
|
Lwt.return acc
|
|
|
|
| Ok steps ->
|
|
|
|
Lwt_list.fold_left_s (fun acc (step, _) ->
|
2023-01-23 08:13:42 +00:00
|
|
|
Store.exists store step >>= function
|
2022-09-26 09:26:58 +00:00
|
|
|
| Error e ->
|
2022-10-20 14:09:05 +00:00
|
|
|
Logs.err (fun m -> m "error %a for exists %a" Store.pp_error e
|
2023-01-23 08:13:42 +00:00
|
|
|
Mirage_kv.Key.pp step);
|
2022-09-26 09:26:58 +00:00
|
|
|
Lwt.return acc
|
|
|
|
| Ok None ->
|
2023-01-23 08:13:42 +00:00
|
|
|
Logs.warn (fun m -> m "no typ for %a" Mirage_kv.Key.pp step);
|
2022-09-26 09:26:58 +00:00
|
|
|
Lwt.return acc
|
2023-01-23 08:13:42 +00:00
|
|
|
| Ok Some `Value -> Lwt.return (step :: acc)
|
|
|
|
| Ok Some `Dictionary -> go store step acc) acc steps
|
2022-08-25 20:47:46 +00:00
|
|
|
in
|
2022-09-26 09:26:58 +00:00
|
|
|
go store Mirage_kv.Key.empty []
|
2022-08-25 20:47:46 +00:00
|
|
|
|
|
|
|
let find_urls store =
|
|
|
|
find_contents store >>= fun paths ->
|
|
|
|
let opam_paths =
|
2022-09-26 09:26:58 +00:00
|
|
|
List.filter (fun p -> Mirage_kv.Key.basename p = "opam") paths
|
2022-08-25 20:47:46 +00:00
|
|
|
in
|
|
|
|
Lwt_list.fold_left_s (fun acc path ->
|
2022-10-20 14:09:05 +00:00
|
|
|
Store.get store path >|= function
|
2022-09-26 09:26:58 +00:00
|
|
|
| Ok data ->
|
2022-08-25 20:47:46 +00:00
|
|
|
(* TODO report parser errors *)
|
|
|
|
(try
|
2024-10-24 12:45:31 +00:00
|
|
|
let url_csums = Opam_file.extract_urls (Mirage_kv.Key.to_string path) data in
|
2024-10-24 13:03:35 +00:00
|
|
|
List.fold_left (fun acc (url, csums) ->
|
2022-08-25 20:47:46 +00:00
|
|
|
if HM.cardinal csums = 0 then
|
|
|
|
(Logs.warn (fun m -> m "no checksums for %s, ignoring" url); acc)
|
|
|
|
else
|
|
|
|
SM.update url (function
|
|
|
|
| None -> Some csums
|
|
|
|
| Some csums' ->
|
|
|
|
if HM.for_all (fun h v ->
|
|
|
|
match HM.find_opt h csums with
|
|
|
|
| None -> true | Some v' -> String.equal v v')
|
|
|
|
csums'
|
|
|
|
then
|
|
|
|
Some (HM.union (fun _h v _v' -> Some v) csums csums')
|
|
|
|
else begin
|
|
|
|
Logs.warn (fun m -> m "mismatching hashes for %s: %s vs %s"
|
|
|
|
url (hm_to_s csums') (hm_to_s csums));
|
|
|
|
None
|
2024-10-24 13:03:35 +00:00
|
|
|
end) acc) acc url_csums
|
2022-08-25 20:47:46 +00:00
|
|
|
with _ ->
|
2022-09-26 09:26:58 +00:00
|
|
|
Logs.warn (fun m -> m "some error in %a, ignoring" Mirage_kv.Key.pp path);
|
2022-08-25 20:47:46 +00:00
|
|
|
acc)
|
2022-10-20 14:09:05 +00:00
|
|
|
| Error e -> Logs.warn (fun m -> m "Store.get: %a" Store.pp_error e); acc)
|
2022-09-04 08:01:45 +00:00
|
|
|
SM.empty opam_paths
|
2022-08-25 20:47:46 +00:00
|
|
|
end
|
|
|
|
|
2024-10-31 10:30:52 +00:00
|
|
|
let active_downloads = ref SM.empty
|
|
|
|
|
|
|
|
let add_to_active url ts =
|
2024-11-04 15:46:01 +00:00
|
|
|
active_downloads := SM.add url (ts, 0) !active_downloads
|
2024-10-31 10:30:52 +00:00
|
|
|
|
|
|
|
let remove_active url =
|
|
|
|
active_downloads := SM.remove url !active_downloads
|
|
|
|
|
|
|
|
let active_add_bytes url written =
|
|
|
|
match SM.find_opt url !active_downloads with
|
|
|
|
| None -> ()
|
2024-11-04 15:46:01 +00:00
|
|
|
| Some (ts, written') ->
|
|
|
|
active_downloads := SM.add url (ts, written + written')
|
2024-10-31 10:30:52 +00:00
|
|
|
!active_downloads
|
|
|
|
|
|
|
|
let failed_downloads = ref SM.empty
|
|
|
|
|
|
|
|
let add_failed url ts reason =
|
|
|
|
remove_active url;
|
|
|
|
failed_downloads := SM.add url (ts, reason) !failed_downloads
|
|
|
|
|
2022-08-26 13:18:02 +00:00
|
|
|
module Disk = struct
|
|
|
|
type t = {
|
|
|
|
mutable md5s : string SM.t ;
|
|
|
|
mutable sha512s : string SM.t ;
|
|
|
|
dev : KV.t ;
|
2022-09-26 15:46:40 +00:00
|
|
|
dev_md5s : Cache.t ;
|
|
|
|
dev_sha512s : Cache.t ;
|
2024-11-01 13:35:08 +00:00
|
|
|
dev_swap : Swap.t ;
|
2022-08-26 13:18:02 +00:00
|
|
|
}
|
|
|
|
|
2024-11-01 13:35:08 +00:00
|
|
|
let empty dev dev_md5s dev_sha512s dev_swap = { md5s = SM.empty ; sha512s = SM.empty ; dev; dev_md5s; dev_sha512s ; dev_swap }
|
2022-09-01 07:31:53 +00:00
|
|
|
|
2022-09-26 15:46:40 +00:00
|
|
|
let marshal_sm (sm : string SM.t) =
|
|
|
|
let version = char_of_int 1 in
|
|
|
|
String.make 1 version ^ Marshal.to_string sm []
|
|
|
|
|
|
|
|
let unmarshal_sm s =
|
|
|
|
let version = int_of_char s.[0] in
|
|
|
|
match version with
|
|
|
|
| 1 -> Ok (Marshal.from_string s 1 : string SM.t)
|
|
|
|
| _ -> Error ("Unsupported version " ^ string_of_int version)
|
|
|
|
|
|
|
|
let update_caches t =
|
|
|
|
Cache.write t.dev_md5s (marshal_sm t.md5s) >>= fun r ->
|
|
|
|
(match r with
|
|
|
|
| Ok () -> Logs.info (fun m -> m "Set 'md5s'")
|
|
|
|
| Error e -> Logs.warn (fun m -> m "Failed to write 'md5s': %a" Cache.pp_write_error e));
|
|
|
|
Cache.write t.dev_sha512s (marshal_sm t.sha512s) >>= fun r ->
|
|
|
|
match r with
|
|
|
|
| Ok () -> Logs.info (fun m -> m "Set 'sha512s'"); Lwt.return_unit
|
|
|
|
| Error e ->
|
|
|
|
Logs.warn (fun m -> m "Failed to write 'sha512s': %a" Cache.pp_write_error e);
|
|
|
|
Lwt.return_unit
|
|
|
|
|
2022-09-27 14:25:57 +00:00
|
|
|
let find_key t h key =
|
2023-01-25 10:34:31 +00:00
|
|
|
assert (List.length (Mirage_kv.Key.segments key) = 1);
|
2022-09-27 14:25:57 +00:00
|
|
|
match
|
|
|
|
match h with
|
2023-01-23 08:13:42 +00:00
|
|
|
| `MD5 ->
|
|
|
|
Option.map Mirage_kv.Key.v (SM.find_opt (Mirage_kv.Key.basename key) t.md5s)
|
|
|
|
| `SHA512 ->
|
|
|
|
Option.map Mirage_kv.Key.v (SM.find_opt (Mirage_kv.Key.basename key) t.sha512s)
|
2022-09-27 14:25:57 +00:00
|
|
|
| `SHA256 -> Some key
|
|
|
|
| _ -> None
|
|
|
|
with
|
|
|
|
| None -> Error `Not_found
|
|
|
|
| Some x -> Ok x
|
|
|
|
|
|
|
|
let read_chunked t h v f a =
|
|
|
|
match find_key t h v with
|
|
|
|
| Error `Not_found ->
|
2023-01-23 08:13:42 +00:00
|
|
|
Lwt.return (Error (`Not_found v))
|
|
|
|
| Ok key ->
|
2022-09-27 14:25:57 +00:00
|
|
|
KV.size t.dev key >>= function
|
|
|
|
| Error e ->
|
2023-01-23 08:13:42 +00:00
|
|
|
Logs.err (fun m -> m "error %a while reading %s %a"
|
|
|
|
KV.pp_error e (hash_to_string h) Mirage_kv.Key.pp v);
|
2022-09-27 14:25:57 +00:00
|
|
|
Lwt.return (Error (`Not_found key))
|
|
|
|
| Ok len ->
|
|
|
|
let chunk_size = 4096 in
|
|
|
|
let rec read_more a offset =
|
|
|
|
if offset < len then
|
|
|
|
KV.get_partial t.dev key ~offset ~length:chunk_size >>= function
|
|
|
|
| Ok data ->
|
|
|
|
f a data >>= fun a ->
|
2023-01-23 08:13:42 +00:00
|
|
|
read_more a Optint.Int63.(add offset (of_int chunk_size))
|
2022-09-27 14:25:57 +00:00
|
|
|
| Error e ->
|
2023-01-23 08:13:42 +00:00
|
|
|
Logs.err (fun m -> m "error %a while reading %s %a"
|
|
|
|
KV.pp_error e (hash_to_string h) Mirage_kv.Key.pp v);
|
2022-09-27 14:25:57 +00:00
|
|
|
Lwt.return (Error e)
|
|
|
|
else
|
|
|
|
Lwt.return (Ok a)
|
|
|
|
in
|
2023-01-23 08:13:42 +00:00
|
|
|
read_more a Optint.Int63.zero
|
2022-09-27 14:25:57 +00:00
|
|
|
|
2024-11-04 15:13:36 +00:00
|
|
|
let init_write t csums =
|
|
|
|
let quux, csums = Archive_checksum.init_write csums in
|
|
|
|
let swap = Swap.empty t.dev_swap in
|
|
|
|
quux, Ok (csums, swap)
|
2023-01-25 10:34:31 +00:00
|
|
|
|
2024-10-31 10:30:52 +00:00
|
|
|
let write_partial t (hash, csum) url =
|
2023-01-25 10:34:31 +00:00
|
|
|
(* XXX: we may be in trouble if different hash functions are used for the same archive *)
|
|
|
|
let ( >>>= ) = Lwt_result.bind in
|
|
|
|
fun response r data ->
|
2024-11-04 15:13:36 +00:00
|
|
|
Lwt.return r >>>= fun (digests, swap) ->
|
2024-10-03 12:37:37 +00:00
|
|
|
let digests = Archive_checksum.update_digests digests data in
|
2024-11-04 15:13:36 +00:00
|
|
|
active_add_bytes url (String.length data);
|
|
|
|
Swap.append swap data >|= function
|
|
|
|
| Ok () -> Ok (digests, swap)
|
|
|
|
| Error swap_err -> Error (`Swap swap_err)
|
2023-01-25 10:34:31 +00:00
|
|
|
|
|
|
|
let check_csums_digests csums digests =
|
2024-10-03 12:37:37 +00:00
|
|
|
let csums' = Archive_checksum.digests_to_hm digests in
|
2023-01-25 10:34:31 +00:00
|
|
|
let common_bindings = List.filter (fun (h, _) -> HM.mem h csums) (HM.bindings csums') in
|
|
|
|
List.length common_bindings > 0 &&
|
|
|
|
List.for_all
|
|
|
|
(fun (h, csum) -> String.equal csum (HM.find h csums))
|
|
|
|
common_bindings
|
|
|
|
|
2024-11-01 13:35:08 +00:00
|
|
|
let set_from_handle dev dest h =
|
|
|
|
let size = Optint.Int63.of_int64 (Swap.size h) in
|
|
|
|
KV.allocate dev dest size >>= fun r ->
|
|
|
|
let rec loop offset =
|
|
|
|
if offset = Swap.size h then
|
|
|
|
Lwt.return_ok ()
|
|
|
|
else
|
|
|
|
let length = Int64.(to_int (min 4096L (sub (Swap.size h) offset))) in
|
|
|
|
Swap.get_partial h ~offset ~length >>= fun r ->
|
|
|
|
match r with
|
|
|
|
| Error e -> Lwt.return (Error (`Swap e))
|
|
|
|
| Ok data ->
|
|
|
|
KV.set_partial dev dest ~offset:(Optint.Int63.of_int64 offset) data
|
|
|
|
>>= fun r ->
|
|
|
|
match r with
|
|
|
|
| Error e -> Lwt.return (Error (`Write_error e))
|
|
|
|
| Ok () ->
|
|
|
|
loop Int64.(add offset (of_int length))
|
|
|
|
in
|
|
|
|
match r with
|
|
|
|
| Ok () ->
|
|
|
|
loop 0L
|
|
|
|
| Error e ->
|
|
|
|
Lwt.return (Error (`Write_error e))
|
|
|
|
|
2024-11-04 15:13:36 +00:00
|
|
|
let finalize_write t (hash, csum) ~url swap csums digests =
|
|
|
|
if check_csums_digests csums digests then
|
2024-10-03 12:37:37 +00:00
|
|
|
let sha256 = Ohex.encode Digestif.SHA256.(to_raw_string (get digests.sha256))
|
|
|
|
and md5 = Ohex.encode Digestif.MD5.(to_raw_string (get digests.md5))
|
|
|
|
and sha512 = Ohex.encode Digestif.SHA512.(to_raw_string (get digests.sha512)) in
|
2023-01-25 10:34:31 +00:00
|
|
|
let dest = Mirage_kv.Key.v sha256 in
|
2024-11-04 15:13:36 +00:00
|
|
|
Logs.info (fun m -> m "downloaded %s, now writing" url);
|
2024-11-04 16:17:37 +00:00
|
|
|
let temp = Mirage_kv.Key.(v "pending" // dest) in
|
|
|
|
Lwt_result.bind
|
|
|
|
(Lwt.finalize (fun () -> set_from_handle t.dev temp swap)
|
|
|
|
(fun () -> Swap.free swap))
|
|
|
|
(fun () -> KV.rename t.dev ~source:temp ~dest
|
|
|
|
|> Lwt_result.map_error (fun e -> `Write_error e))
|
2024-11-04 15:13:36 +00:00
|
|
|
>|= function
|
2023-01-25 10:34:31 +00:00
|
|
|
| Ok () ->
|
2024-10-31 10:30:52 +00:00
|
|
|
remove_active url;
|
2023-01-25 10:34:31 +00:00
|
|
|
t.md5s <- SM.add md5 sha256 t.md5s;
|
2023-05-02 12:03:25 +00:00
|
|
|
t.sha512s <- SM.add sha512 sha256 t.sha512s
|
2023-01-25 10:34:31 +00:00
|
|
|
| Error e ->
|
2024-11-01 13:35:08 +00:00
|
|
|
let pp_error ppf = function
|
|
|
|
| `Write_error e -> KV.pp_write_error ppf e
|
|
|
|
| `Swap e -> Swap.pp_error ppf e
|
|
|
|
in
|
|
|
|
Logs.err (fun m -> m "Write failure for %s: %a" url pp_error e);
|
2024-10-31 10:30:52 +00:00
|
|
|
add_failed url (Ptime.v (Pclock.now_d_ps ()))
|
2024-11-01 13:35:08 +00:00
|
|
|
(Fmt.str "Write failure for %s: %a" url pp_error e)
|
2024-11-04 15:50:42 +00:00
|
|
|
else begin
|
|
|
|
add_failed url (Ptime.v (Pclock.now_d_ps ()))
|
|
|
|
(Fmt.str "Bad checksum %s:%s: computed %s expected %s" url
|
|
|
|
(hash_to_string hash)
|
|
|
|
(Ohex.encode (Archive_checksum.get digests hash))
|
|
|
|
(Ohex.encode csum));
|
|
|
|
Logs.err (fun m -> m "Bad checksum %s:%s: computed %s expected %s" url
|
|
|
|
(hash_to_string hash)
|
|
|
|
(Ohex.encode (Archive_checksum.get digests hash))
|
|
|
|
(Ohex.encode csum));
|
|
|
|
Lwt.return_unit
|
|
|
|
end
|
2023-01-25 10:34:31 +00:00
|
|
|
|
2022-08-26 13:18:02 +00:00
|
|
|
(* on disk, we use a flat file system where the filename is the sha256 of the data *)
|
2024-11-01 13:35:08 +00:00
|
|
|
let init ~verify_sha256 dev dev_md5s dev_sha512s dev_swap =
|
2022-08-26 13:18:02 +00:00
|
|
|
KV.list dev Mirage_kv.Key.empty >>= function
|
|
|
|
| Error e -> Logs.err (fun m -> m "error %a listing kv" KV.pp_error e); assert false
|
|
|
|
| Ok entries ->
|
2024-11-01 13:35:08 +00:00
|
|
|
let t = empty dev dev_md5s dev_sha512s dev_swap in
|
2022-09-26 15:46:40 +00:00
|
|
|
Cache.read t.dev_md5s >>= fun r ->
|
|
|
|
(match r with
|
2022-10-28 11:57:30 +00:00
|
|
|
| Ok Some s ->
|
2022-10-28 12:58:58 +00:00
|
|
|
if not verify_sha256 then
|
2022-10-28 11:57:30 +00:00
|
|
|
Result.iter (fun md5s -> t.md5s <- md5s) (unmarshal_sm s)
|
2022-09-26 15:46:40 +00:00
|
|
|
| Ok None -> Logs.debug (fun m -> m "No md5s cached")
|
2022-09-26 19:49:47 +00:00
|
|
|
| Error e -> Logs.warn (fun m -> m "Error reading md5s cache: %a" Cache.pp_error e));
|
2022-09-26 15:46:40 +00:00
|
|
|
Cache.read t.dev_sha512s >>= fun r ->
|
|
|
|
(match r with
|
2022-10-28 11:57:30 +00:00
|
|
|
| Ok Some s ->
|
2022-10-28 12:58:58 +00:00
|
|
|
if not verify_sha256 then
|
2022-10-28 11:57:30 +00:00
|
|
|
Result.iter (fun sha512s -> t.sha512s <- sha512s) (unmarshal_sm s)
|
2022-09-26 15:46:40 +00:00
|
|
|
| Ok None -> Logs.debug (fun m -> m "No sha512s cached")
|
2022-09-26 19:49:47 +00:00
|
|
|
| Error e -> Logs.warn (fun m -> m "Error reading sha512s cache: %a" Cache.pp_error e));
|
2022-09-26 15:46:40 +00:00
|
|
|
let md5s = SSet.of_list (List.map snd (SM.bindings t.md5s))
|
|
|
|
and sha512s = SSet.of_list (List.map snd (SM.bindings t.sha512s)) in
|
2022-09-26 19:49:47 +00:00
|
|
|
let idx = ref 1 in
|
2023-01-23 08:13:42 +00:00
|
|
|
Lwt_list.iter_s (fun (path, typ) ->
|
2022-09-26 19:49:47 +00:00
|
|
|
if !idx mod 10 = 0 then Gc.full_major () ;
|
2022-08-26 13:18:02 +00:00
|
|
|
match typ with
|
|
|
|
| `Dictionary ->
|
2023-01-23 08:13:42 +00:00
|
|
|
Logs.warn (fun m -> m "unexpected dictionary at %a" Mirage_kv.Key.pp path);
|
2022-08-26 13:18:02 +00:00
|
|
|
Lwt.return_unit
|
|
|
|
| `Value ->
|
2024-10-03 12:37:37 +00:00
|
|
|
let open Digestif in
|
2022-09-27 14:25:57 +00:00
|
|
|
let sha256_final =
|
2022-10-28 12:58:58 +00:00
|
|
|
if verify_sha256 then
|
2022-09-27 14:25:57 +00:00
|
|
|
let f s =
|
2024-10-03 12:37:37 +00:00
|
|
|
let digest = SHA256.(to_raw_string (get s)) in
|
|
|
|
if not (String.equal (Mirage_kv.Key.basename path) (Ohex.encode digest)) then
|
2023-01-23 08:13:42 +00:00
|
|
|
Logs.err (fun m -> m "corrupt SHA256 data for %a, \
|
2022-09-27 14:25:57 +00:00
|
|
|
computed %s (should remove)"
|
2024-10-03 12:37:37 +00:00
|
|
|
Mirage_kv.Key.pp path (Ohex.encode digest))
|
2022-09-27 14:25:57 +00:00
|
|
|
in
|
|
|
|
Some f
|
|
|
|
else
|
|
|
|
None
|
|
|
|
and md5_final =
|
2023-01-23 08:13:42 +00:00
|
|
|
if not (SSet.mem (Mirage_kv.Key.basename path) md5s) then
|
2022-09-27 14:25:57 +00:00
|
|
|
let f s =
|
2024-10-03 12:37:37 +00:00
|
|
|
let digest = MD5.(to_raw_string (get s)) in
|
|
|
|
t.md5s <- SM.add (Ohex.encode digest) (Mirage_kv.Key.basename path) t.md5s
|
2022-09-27 14:25:57 +00:00
|
|
|
in
|
|
|
|
Some f
|
|
|
|
else
|
|
|
|
None
|
|
|
|
and sha512_final =
|
2023-01-23 08:13:42 +00:00
|
|
|
if not (SSet.mem (Mirage_kv.Key.basename path) sha512s) then
|
2022-09-27 14:25:57 +00:00
|
|
|
let f s =
|
2024-10-03 12:37:37 +00:00
|
|
|
let digest = SHA512.(to_raw_string (get s)) in
|
|
|
|
t.sha512s <- SM.add (Ohex.encode digest) (Mirage_kv.Key.basename path) t.sha512s
|
2022-09-27 14:25:57 +00:00
|
|
|
in
|
|
|
|
Some f
|
|
|
|
else
|
|
|
|
None
|
2022-09-26 19:49:47 +00:00
|
|
|
in
|
2022-09-27 14:25:57 +00:00
|
|
|
match sha256_final, md5_final, sha512_final with
|
|
|
|
| None, None, None -> Lwt.return_unit
|
|
|
|
| _ ->
|
2023-01-23 08:13:42 +00:00
|
|
|
read_chunked t `SHA256 path
|
2022-09-27 14:25:57 +00:00
|
|
|
(fun (sha256, md5, sha512) data ->
|
|
|
|
Lwt.return
|
2024-10-03 12:37:37 +00:00
|
|
|
(Option.map (fun t -> SHA256.feed_string t data) sha256,
|
|
|
|
Option.map (fun t -> MD5.feed_string t data) md5,
|
|
|
|
Option.map (fun t -> SHA512.feed_string t data) sha512))
|
2022-09-27 14:25:57 +00:00
|
|
|
(Option.map (fun _ -> SHA256.empty) sha256_final,
|
|
|
|
Option.map (fun _ -> MD5.empty) md5_final,
|
|
|
|
Option.map (fun _ -> SHA512.empty) sha512_final) >|= function
|
|
|
|
| Error e ->
|
2023-01-23 08:13:42 +00:00
|
|
|
Logs.err (fun m -> m "error %a of %a while computing digests"
|
|
|
|
KV.pp_error e Mirage_kv.Key.pp path)
|
2022-09-27 14:25:57 +00:00
|
|
|
| Ok (sha256, md5, sha512) ->
|
|
|
|
Option.iter (fun f -> f (Option.get sha256)) sha256_final;
|
|
|
|
Option.iter (fun f -> f (Option.get md5)) md5_final;
|
|
|
|
Option.iter (fun f -> f (Option.get sha512)) sha512_final;
|
2023-01-23 08:13:42 +00:00
|
|
|
Logs.info (fun m -> m "added %a" Mirage_kv.Key.pp path))
|
2022-09-26 19:49:47 +00:00
|
|
|
entries >>= fun () ->
|
|
|
|
update_caches t >|= fun () ->
|
2022-08-26 13:18:02 +00:00
|
|
|
t
|
|
|
|
|
2022-08-26 13:34:20 +00:00
|
|
|
let exists t h v =
|
|
|
|
match find_key t h v with
|
|
|
|
| Error _ -> Lwt.return false
|
|
|
|
| Ok x ->
|
2023-01-23 08:13:42 +00:00
|
|
|
KV.exists t.dev x >|= function
|
2022-08-26 13:34:20 +00:00
|
|
|
| Ok Some `Value -> true
|
|
|
|
| Ok Some `Dictionary ->
|
2023-01-23 08:13:42 +00:00
|
|
|
Logs.err (fun m -> m "unexpected dictionary for %s %a"
|
|
|
|
(hash_to_string h) Mirage_kv.Key.pp v);
|
2022-08-26 13:34:20 +00:00
|
|
|
false
|
|
|
|
| Ok None -> false
|
|
|
|
| Error e ->
|
2023-01-23 08:13:42 +00:00
|
|
|
Logs.err (fun m -> m "exists %s %a returned %a"
|
|
|
|
(hash_to_string h) Mirage_kv.Key.pp v KV.pp_error e);
|
2022-08-26 13:34:20 +00:00
|
|
|
false
|
|
|
|
|
2022-09-04 08:01:45 +00:00
|
|
|
let last_modified t h v =
|
|
|
|
match find_key t h v with
|
|
|
|
| Error _ as e -> Lwt.return e
|
|
|
|
| Ok x ->
|
2023-01-23 08:13:42 +00:00
|
|
|
KV.last_modified t.dev x >|= function
|
2022-09-04 08:01:45 +00:00
|
|
|
| Ok data -> Ok data
|
|
|
|
| Error e ->
|
2023-01-23 08:13:42 +00:00
|
|
|
Logs.err (fun m -> m "error %a while last_modified %s %a"
|
|
|
|
KV.pp_error e (hash_to_string h) Mirage_kv.Key.pp v);
|
2022-09-04 08:01:45 +00:00
|
|
|
Error `Not_found
|
2022-09-27 08:46:14 +00:00
|
|
|
|
|
|
|
let size t h v =
|
|
|
|
match find_key t h v with
|
|
|
|
| Error _ as e -> Lwt.return e
|
|
|
|
| Ok x ->
|
2023-01-23 08:13:42 +00:00
|
|
|
KV.size t.dev x >|= function
|
2022-09-27 08:46:14 +00:00
|
|
|
| Ok s -> Ok s
|
|
|
|
| Error e ->
|
2023-01-23 08:13:42 +00:00
|
|
|
Logs.err (fun m -> m "error %a while size %s %a"
|
|
|
|
KV.pp_error e (hash_to_string h) Mirage_kv.Key.pp v);
|
2022-09-27 08:46:14 +00:00
|
|
|
Error `Not_found
|
|
|
|
end
|
2022-09-04 08:01:45 +00:00
|
|
|
|
|
|
|
module Tarball = struct
|
2024-07-31 12:54:26 +00:00
|
|
|
module High : sig
|
|
|
|
type t
|
|
|
|
type 'a s = 'a Lwt.t
|
|
|
|
|
|
|
|
external inj : 'a s -> ('a, t) Tar.io = "%identity"
|
|
|
|
external prj : ('a, t) Tar.io -> 'a s = "%identity"
|
|
|
|
end = struct
|
|
|
|
type t
|
|
|
|
type 'a s = 'a Lwt.t
|
|
|
|
|
|
|
|
external inj : 'a -> 'b = "%identity"
|
|
|
|
external prj : 'a -> 'b = "%identity"
|
2022-09-04 08:01:45 +00:00
|
|
|
end
|
|
|
|
|
2024-07-31 12:54:26 +00:00
|
|
|
let to_buffer buf t =
|
|
|
|
let rec run : type a. (a, [> `Msg of string ] as 'err, High.t) Tar.t -> (a, 'err) result Lwt.t
|
|
|
|
= function
|
|
|
|
| Tar.Write str ->
|
|
|
|
Buffer.add_string buf str;
|
|
|
|
Lwt.return_ok ()
|
|
|
|
| Tar.Read _ -> assert false
|
|
|
|
| Tar.Really_read _ -> assert false
|
|
|
|
| Tar.Seek _ -> assert false
|
|
|
|
| Tar.Return value -> Lwt.return value
|
|
|
|
| Tar.High value -> High.prj value
|
|
|
|
| Tar.Bind (x, f) ->
|
|
|
|
let open Lwt_result.Infix in
|
|
|
|
run x >>= fun value -> run (f value) in
|
|
|
|
run t
|
|
|
|
|
|
|
|
let once data =
|
|
|
|
let closed = ref false in
|
|
|
|
fun () -> if !closed
|
|
|
|
then Tar.High (High.inj (Lwt.return_ok None))
|
|
|
|
else begin closed := true; Tar.High (High.inj (Lwt.return_ok (Some data))) end
|
|
|
|
|
|
|
|
let entries_of_git ~mtime store repo =
|
|
|
|
Git.find_contents store >>= fun paths ->
|
|
|
|
let entries = Lwt_stream.of_list paths in
|
|
|
|
let to_entry path =
|
|
|
|
Store.get store path >|= function
|
|
|
|
| Ok data ->
|
|
|
|
let data =
|
|
|
|
if Mirage_kv.Key.(equal path (v "repo"))
|
|
|
|
then repo else data in
|
|
|
|
let file_mode = 0o644
|
|
|
|
and mod_time = Int64.of_int mtime
|
|
|
|
and user_id = 0
|
|
|
|
and group_id = 0
|
|
|
|
and size = String.length data in
|
|
|
|
let hdr = Tar.Header.make ~file_mode ~mod_time ~user_id ~group_id
|
|
|
|
(Mirage_kv.Key.to_string path) (Int64.of_int size) in
|
2024-10-09 17:54:40 +00:00
|
|
|
Some (Some Tar.Header.Ustar, hdr, once data)
|
2024-07-31 12:54:26 +00:00
|
|
|
| Error _ -> None in
|
|
|
|
let entries = Lwt_stream.filter_map_s to_entry entries in
|
|
|
|
Lwt.return begin fun () -> Tar.High (High.inj (Lwt_stream.get entries >|= Result.ok)) end
|
2022-09-04 08:01:45 +00:00
|
|
|
|
|
|
|
let of_git repo store =
|
|
|
|
let now = Ptime.v (Pclock.now_d_ps ()) in
|
|
|
|
let mtime = Option.value ~default:0 Ptime.(Span.to_int_s (to_span now)) in
|
2024-07-31 12:54:26 +00:00
|
|
|
entries_of_git ~mtime store repo >>= fun entries ->
|
2024-10-09 17:54:40 +00:00
|
|
|
let t = Tar.out ~level:Ustar entries in
|
2024-07-31 12:54:26 +00:00
|
|
|
let t = Tar_gz.out_gzipped ~level:4 ~mtime:(Int32.of_int mtime) Gz.Unix t in
|
|
|
|
let buf = Buffer.create 1024 in
|
|
|
|
to_buffer buf t >|= function
|
|
|
|
| Ok () -> Buffer.contents buf
|
|
|
|
| Error (`Msg msg) -> failwith msg
|
2022-09-04 08:01:45 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
module Serve = struct
|
|
|
|
let ptime_to_http_date ptime =
|
|
|
|
let (y, m, d), ((hh, mm, ss), _) = Ptime.to_date_time ptime
|
|
|
|
and weekday = match Ptime.weekday ptime with
|
|
|
|
| `Mon -> "Mon" | `Tue -> "Tue" | `Wed -> "Wed" | `Thu -> "Thu"
|
|
|
|
| `Fri -> "Fri" | `Sat -> "Sat" | `Sun -> "Sun"
|
|
|
|
and month =
|
|
|
|
[| "Jan" ; "Feb" ; "Mar" ; "Apr" ; "May" ; "Jun" ;
|
|
|
|
"Jul" ; "Aug" ; "Sep" ; "Oct" ; "Nov" ; "Dec" |]
|
|
|
|
in
|
|
|
|
let m' = Array.get month (pred m) in
|
|
|
|
Printf.sprintf "%s, %02d %s %04d %02d:%02d:%02d GMT" weekday d m' y hh mm ss
|
|
|
|
|
2022-09-26 09:26:58 +00:00
|
|
|
let commit_id git_kv =
|
2022-10-20 14:09:05 +00:00
|
|
|
Store.digest git_kv Mirage_kv.Key.empty >|= fun r ->
|
2023-02-08 15:30:30 +00:00
|
|
|
Result.fold r ~ok:Fun.id
|
|
|
|
~error:(fun e ->
|
|
|
|
Logs.err (fun m -> m "%a" Store.pp_error e);
|
|
|
|
exit 2)
|
2022-09-04 08:01:45 +00:00
|
|
|
|
2024-07-15 10:22:57 +00:00
|
|
|
let repo remote commit =
|
|
|
|
let upstream = List.hd (String.split_on_char '#' remote) in
|
2022-09-04 08:01:45 +00:00
|
|
|
Fmt.str
|
|
|
|
{|opam-version: "2.0"
|
|
|
|
upstream: "%s#%s"
|
|
|
|
archive-mirrors: "cache"
|
|
|
|
stamp: %S
|
|
|
|
|} upstream commit commit
|
|
|
|
|
2022-09-26 09:26:58 +00:00
|
|
|
let modified git_kv =
|
2022-10-20 14:09:05 +00:00
|
|
|
Store.last_modified git_kv Mirage_kv.Key.empty >|= fun r ->
|
2023-01-23 08:13:42 +00:00
|
|
|
let v =
|
|
|
|
Result.fold r
|
|
|
|
~ok:Fun.id
|
|
|
|
~error:(fun _ -> Ptime.v (Pclock.now_d_ps ()))
|
|
|
|
in
|
|
|
|
ptime_to_http_date v
|
2022-09-04 08:01:45 +00:00
|
|
|
|
|
|
|
type t = {
|
2022-09-04 12:51:41 +00:00
|
|
|
mutable commit_id : string ;
|
|
|
|
mutable modified : string ;
|
|
|
|
mutable repo : string ;
|
|
|
|
mutable index : string ;
|
2022-09-04 08:01:45 +00:00
|
|
|
}
|
|
|
|
|
2024-07-15 10:22:57 +00:00
|
|
|
let create remote git_kv =
|
2022-09-26 09:26:58 +00:00
|
|
|
commit_id git_kv >>= fun commit_id ->
|
|
|
|
modified git_kv >>= fun modified ->
|
2024-07-15 10:22:57 +00:00
|
|
|
let repo = repo remote commit_id in
|
2022-09-26 09:26:58 +00:00
|
|
|
Tarball.of_git repo git_kv >|= fun index ->
|
2022-09-04 08:01:45 +00:00
|
|
|
{ commit_id ; modified ; repo ; index }
|
|
|
|
|
2022-09-04 12:51:41 +00:00
|
|
|
let update_lock = Lwt_mutex.create ()
|
|
|
|
|
2024-07-15 10:22:57 +00:00
|
|
|
let update_git ~remote t git_kv =
|
2022-09-04 12:51:41 +00:00
|
|
|
Lwt_mutex.with_lock update_lock (fun () ->
|
2022-09-26 16:33:29 +00:00
|
|
|
Logs.info (fun m -> m "pulling the git repository");
|
2022-09-26 09:26:58 +00:00
|
|
|
Git_kv.pull git_kv >>= function
|
2022-09-04 12:51:41 +00:00
|
|
|
| Error `Msg msg ->
|
|
|
|
Logs.err (fun m -> m "error %s while updating git" msg);
|
|
|
|
Lwt.return None
|
2022-09-26 13:03:35 +00:00
|
|
|
| Ok [] ->
|
2022-09-26 16:33:29 +00:00
|
|
|
Logs.info (fun m -> m "git changes are empty");
|
2022-09-26 13:03:35 +00:00
|
|
|
Lwt.return (Some [])
|
2022-09-26 09:26:58 +00:00
|
|
|
| Ok changes ->
|
|
|
|
commit_id git_kv >>= fun commit_id ->
|
|
|
|
modified git_kv >>= fun modified ->
|
|
|
|
Logs.info (fun m -> m "git: %s" commit_id);
|
2024-07-15 10:22:57 +00:00
|
|
|
let repo = repo remote commit_id in
|
2022-09-26 09:26:58 +00:00
|
|
|
Tarball.of_git repo git_kv >|= fun index ->
|
2022-09-04 12:51:41 +00:00
|
|
|
t.commit_id <- commit_id ;
|
|
|
|
t.modified <- modified ;
|
|
|
|
t.repo <- repo ;
|
|
|
|
t.index <- index;
|
2022-09-26 09:26:58 +00:00
|
|
|
Some changes)
|
2022-09-04 12:51:41 +00:00
|
|
|
|
2024-10-31 10:30:52 +00:00
|
|
|
let status disk =
|
|
|
|
(* report status:
|
|
|
|
- archive size (can we easily measure?) and number of "good" elements
|
|
|
|
- list of current downloads
|
|
|
|
- list of failed downloads
|
|
|
|
*)
|
2024-10-31 16:45:34 +00:00
|
|
|
let archive_stats =
|
|
|
|
Fmt.str "<ul><li>%u validated archives on disk</li><li>%Lu bytes free</li></ul>"
|
|
|
|
(SM.cardinal disk.Disk.md5s)
|
|
|
|
(KV.free disk.Disk.dev)
|
|
|
|
in
|
2024-10-31 10:30:52 +00:00
|
|
|
let active_downloads =
|
|
|
|
let header = "<h2>Active downloads</h2><ul>" in
|
|
|
|
let content =
|
2024-11-04 15:46:01 +00:00
|
|
|
SM.fold (fun url (ts, bytes_written) acc ->
|
|
|
|
("<li>" ^ Ptime.to_rfc3339 ?tz_offset_s:None ts ^ ": " ^ url ^ " " ^ string_of_int bytes_written ^ " bytes written to disk</li>")
|
2024-10-31 10:30:52 +00:00
|
|
|
^ acc)
|
|
|
|
!active_downloads ""
|
|
|
|
in
|
|
|
|
header ^ content ^ "</ul>"
|
|
|
|
and failed_downloads =
|
|
|
|
let header = "<h2>Failed downloads</h2><ul>" in
|
|
|
|
let content =
|
|
|
|
SM.fold (fun url (ts, reason) acc ->
|
|
|
|
("<li>" ^ Ptime.to_rfc3339 ?tz_offset_s:None ts ^ ": " ^ url ^ " " ^ reason ^ "</li>")
|
|
|
|
^ acc)
|
|
|
|
!failed_downloads ""
|
|
|
|
in
|
|
|
|
header ^ content ^ "</ul>"
|
|
|
|
in
|
2024-10-31 16:17:34 +00:00
|
|
|
"<html><head><title>Opam-mirror status page</title></head><body><h1>Opam mirror status</h1><div>"
|
2024-10-31 10:30:52 +00:00
|
|
|
^ String.concat "</div><div>" [ archive_stats ; active_downloads ; failed_downloads ]
|
|
|
|
^ "</div></body></html>"
|
|
|
|
|
2022-09-04 08:01:45 +00:00
|
|
|
let not_modified request (modified, etag) =
|
|
|
|
match Httpaf.Headers.get request.Httpaf.Request.headers "if-modified-since" with
|
|
|
|
| Some ts -> String.equal ts modified
|
|
|
|
| None -> match Httpaf.Headers.get request.Httpaf.Request.headers "if-none-match" with
|
|
|
|
| Some etags -> List.mem etag (String.split_on_char ',' etags)
|
|
|
|
| None -> false
|
|
|
|
|
|
|
|
let not_found reqd path =
|
|
|
|
let data = "Resource not found " ^ path in
|
|
|
|
let headers = Httpaf.Headers.of_list
|
|
|
|
[ "content-length", string_of_int (String.length data) ] in
|
|
|
|
let resp = Httpaf.Response.create ~headers `Not_found in
|
|
|
|
Httpaf.Reqd.respond_with_string reqd resp data
|
|
|
|
|
|
|
|
let respond_with_empty reqd resp =
|
|
|
|
let hdr =
|
|
|
|
Httpaf.Headers.add_unless_exists resp.Httpaf.Response.headers
|
|
|
|
"connection" "close"
|
|
|
|
in
|
|
|
|
let resp = { resp with Httpaf.Response.headers = hdr } in
|
|
|
|
Httpaf.Reqd.respond_with_string reqd resp ""
|
|
|
|
|
|
|
|
(* From the OPAM manual, all we need:
|
|
|
|
/repo -- repository configuration file
|
|
|
|
/cache -- cached archives
|
|
|
|
/index.tar.gz -- archive containing the whole repository contents
|
|
|
|
*)
|
|
|
|
(* may include "announce: [ string { filter } ... ]" *)
|
|
|
|
(* use Key_gen.remote for browse & upstream *)
|
|
|
|
|
|
|
|
(* for repo and index.tar.gz:
|
|
|
|
if Last_modified.not_modified request then
|
|
|
|
let resp = Httpaf.Response.create `Not_modified in
|
|
|
|
respond_with_empty reqd resp
|
|
|
|
else *)
|
2022-09-26 19:51:42 +00:00
|
|
|
let dispatch t store hook_url update _flow _conn reqd =
|
2022-09-04 08:01:45 +00:00
|
|
|
let request = Httpaf.Reqd.request reqd in
|
|
|
|
Logs.info (fun f -> f "requested %s" request.Httpaf.Request.target);
|
|
|
|
match String.split_on_char '/' request.Httpaf.Request.target with
|
2022-09-04 12:51:41 +00:00
|
|
|
| [ ""; x ] when String.equal x hook_url ->
|
2022-09-26 12:11:23 +00:00
|
|
|
Lwt.async update;
|
2022-09-04 12:51:41 +00:00
|
|
|
let data = "Update in progress" in
|
|
|
|
let mime_type = "text/plain" in
|
|
|
|
let headers = [
|
|
|
|
"content-type", mime_type ;
|
|
|
|
"etag", t.commit_id ;
|
|
|
|
"last-modified", t.modified ;
|
|
|
|
"content-length", string_of_int (String.length data) ;
|
|
|
|
] in
|
|
|
|
let headers = Httpaf.Headers.of_list headers in
|
|
|
|
let resp = Httpaf.Response.create ~headers `OK in
|
|
|
|
Httpaf.Reqd.respond_with_string reqd resp data
|
2024-10-31 10:30:52 +00:00
|
|
|
| [ ""; x ] when String.equal x "status" ->
|
|
|
|
let data = status store in
|
|
|
|
let mime_type = "text/html" in
|
|
|
|
let headers = [
|
|
|
|
"content-type", mime_type ;
|
|
|
|
"content-length", string_of_int (String.length data) ;
|
|
|
|
] in
|
|
|
|
let headers = Httpaf.Headers.of_list headers in
|
|
|
|
let resp = Httpaf.Response.create ~headers `OK in
|
|
|
|
Httpaf.Reqd.respond_with_string reqd resp data
|
2022-09-04 08:01:45 +00:00
|
|
|
| [ ""; "repo" ] ->
|
|
|
|
if not_modified request (t.modified, t.commit_id) then
|
|
|
|
let resp = Httpaf.Response.create `Not_modified in
|
|
|
|
respond_with_empty reqd resp
|
|
|
|
else
|
|
|
|
let data = t.repo in
|
|
|
|
let mime_type = "text/plain" in
|
|
|
|
let headers = [
|
|
|
|
"content-type", mime_type ;
|
|
|
|
"etag", t.commit_id ;
|
|
|
|
"last-modified", t.modified ;
|
|
|
|
"content-length", string_of_int (String.length data) ;
|
|
|
|
] in
|
|
|
|
let headers = Httpaf.Headers.of_list headers in
|
|
|
|
let resp = Httpaf.Response.create ~headers `OK in
|
|
|
|
Httpaf.Reqd.respond_with_string reqd resp data
|
|
|
|
| [ ""; "index.tar.gz" ] ->
|
|
|
|
(* deliver prepared tarball *)
|
|
|
|
if not_modified request (t.modified, t.commit_id) then
|
|
|
|
let resp = Httpaf.Response.create `Not_modified in
|
|
|
|
respond_with_empty reqd resp
|
|
|
|
else
|
|
|
|
let data = t.index in
|
|
|
|
let mime_type = "application/octet-stream" in
|
|
|
|
let headers = [
|
|
|
|
"content-type", mime_type ;
|
|
|
|
"etag", t.commit_id ;
|
|
|
|
"last-modified", t.modified ;
|
|
|
|
"content-length", string_of_int (String.length data) ;
|
|
|
|
] in
|
|
|
|
let headers = Httpaf.Headers.of_list headers in
|
|
|
|
let resp = Httpaf.Response.create ~headers `OK in
|
|
|
|
Httpaf.Reqd.respond_with_string reqd resp data
|
|
|
|
| "" :: "cache" :: hash_algo :: _ :: hash :: [] ->
|
|
|
|
(* `<hash-algo>/<first-2-hash-characters>/<hash>` *)
|
|
|
|
begin
|
|
|
|
match hash_of_string hash_algo with
|
|
|
|
| Error `Msg msg ->
|
|
|
|
Logs.warn (fun m -> m "error decoding hash algo: %s" msg);
|
|
|
|
not_found reqd request.Httpaf.Request.target
|
|
|
|
| Ok h ->
|
2023-01-23 08:13:42 +00:00
|
|
|
let hash = Mirage_kv.Key.v hash in
|
2022-09-04 08:01:45 +00:00
|
|
|
Lwt.async (fun () ->
|
|
|
|
(Disk.last_modified store h hash >|= function
|
|
|
|
| Error _ ->
|
|
|
|
Logs.warn (fun m -> m "error retrieving last modified");
|
|
|
|
t.modified
|
2023-01-23 08:13:42 +00:00
|
|
|
| Ok v -> ptime_to_http_date v) >>= fun last_modified ->
|
|
|
|
if not_modified request (last_modified, Mirage_kv.Key.basename hash) then
|
2022-09-04 08:01:45 +00:00
|
|
|
let resp = Httpaf.Response.create `Not_modified in
|
|
|
|
respond_with_empty reqd resp;
|
|
|
|
Lwt.return_unit
|
|
|
|
else
|
2022-09-27 08:46:14 +00:00
|
|
|
Disk.size store h hash >>= function
|
2022-09-04 08:01:45 +00:00
|
|
|
| Error _ ->
|
2022-09-27 08:46:14 +00:00
|
|
|
Logs.warn (fun m -> m "error retrieving size");
|
2022-09-04 08:01:45 +00:00
|
|
|
not_found reqd request.Httpaf.Request.target;
|
|
|
|
Lwt.return_unit
|
2022-09-27 08:46:14 +00:00
|
|
|
| Ok size ->
|
2023-01-23 08:13:42 +00:00
|
|
|
let size = Optint.Int63.to_string size in
|
2022-09-04 08:01:45 +00:00
|
|
|
let mime_type = "application/octet-stream" in
|
|
|
|
let headers = [
|
|
|
|
"content-type", mime_type ;
|
2023-01-23 08:13:42 +00:00
|
|
|
"etag", Mirage_kv.Key.basename hash ;
|
2022-09-04 08:01:45 +00:00
|
|
|
"last-modified", last_modified ;
|
2022-09-27 08:46:14 +00:00
|
|
|
"content-length", size ;
|
|
|
|
]
|
|
|
|
in
|
2022-09-04 08:01:45 +00:00
|
|
|
let headers = Httpaf.Headers.of_list headers in
|
|
|
|
let resp = Httpaf.Response.create ~headers `OK in
|
2022-09-27 08:46:14 +00:00
|
|
|
let body = Httpaf.Reqd.respond_with_streaming reqd resp in
|
2022-09-27 14:25:57 +00:00
|
|
|
Disk.read_chunked store h hash (fun () chunk ->
|
2022-09-27 10:22:29 +00:00
|
|
|
let wait, wakeup = Lwt.task () in
|
2023-01-31 09:27:25 +00:00
|
|
|
(* FIXME: catch exception when body is closed *)
|
2022-09-27 10:22:29 +00:00
|
|
|
Httpaf.Body.write_string body chunk;
|
|
|
|
Httpaf.Body.flush body (Lwt.wakeup wakeup);
|
2022-09-27 14:25:57 +00:00
|
|
|
wait) () >|= fun _ ->
|
2022-09-27 08:46:14 +00:00
|
|
|
Httpaf.Body.close_writer body)
|
2022-09-04 08:01:45 +00:00
|
|
|
end
|
|
|
|
| _ ->
|
|
|
|
Logs.warn (fun m -> m "unknown request %s" request.Httpaf.Request.target);
|
|
|
|
not_found reqd request.Httpaf.Request.target
|
|
|
|
|
2022-08-26 13:18:02 +00:00
|
|
|
end
|
|
|
|
|
2024-07-15 10:22:57 +00:00
|
|
|
let download_archives parallel_downloads disk http_client store =
|
2023-01-25 10:34:31 +00:00
|
|
|
(* FIXME: handle resuming partial downloads *)
|
2022-09-04 08:01:45 +00:00
|
|
|
Git.find_urls store >>= fun urls ->
|
2024-07-15 10:22:57 +00:00
|
|
|
let pool = Lwt_pool.create parallel_downloads (Fun.const Lwt.return_unit) in
|
2022-09-26 11:46:23 +00:00
|
|
|
let idx = ref 0 in
|
2022-09-04 08:01:45 +00:00
|
|
|
Lwt_list.iter_p (fun (url, csums) ->
|
|
|
|
Lwt_pool.use pool @@ fun () ->
|
|
|
|
HM.fold (fun h v r ->
|
|
|
|
r >>= function
|
2023-01-23 08:13:42 +00:00
|
|
|
| true -> Disk.exists disk h (hex_to_key v)
|
2022-09-04 08:01:45 +00:00
|
|
|
| false -> Lwt.return false)
|
|
|
|
csums (Lwt.return true) >>= function
|
|
|
|
| true ->
|
|
|
|
Logs.debug (fun m -> m "ignoring %s (already present)" url);
|
|
|
|
Lwt.return_unit
|
|
|
|
| false ->
|
2022-09-26 11:46:23 +00:00
|
|
|
incr idx;
|
|
|
|
if !idx mod 10 = 0 then Gc.full_major () ;
|
2022-09-07 07:08:45 +00:00
|
|
|
Logs.info (fun m -> m "downloading %s" url);
|
2024-11-04 15:13:36 +00:00
|
|
|
let quux, body_init = Disk.init_write disk csums in
|
2024-10-31 10:30:52 +00:00
|
|
|
add_to_active url (Ptime.v (Pclock.now_d_ps ()));
|
|
|
|
Http_mirage_client.request http_client url (Disk.write_partial disk quux url) body_init >>= function
|
2023-01-25 10:34:31 +00:00
|
|
|
| Ok (resp, r) ->
|
2023-05-02 12:03:25 +00:00
|
|
|
begin match r with
|
|
|
|
| Error `Bad_response ->
|
2023-01-25 10:34:31 +00:00
|
|
|
Logs.warn (fun m -> m "%s: %a (reason %s)"
|
|
|
|
url H2.Status.pp_hum resp.status resp.reason);
|
2024-10-31 10:30:52 +00:00
|
|
|
add_failed url (Ptime.v (Pclock.now_d_ps ()))
|
|
|
|
(Fmt.str "%a %s" H2.Status.pp_hum resp.status resp.reason);
|
2023-01-25 10:34:31 +00:00
|
|
|
Lwt.return_unit
|
2023-05-02 12:03:25 +00:00
|
|
|
| Error `Write_error e ->
|
2024-11-04 15:13:36 +00:00
|
|
|
Logs.err (fun m -> m "%s: write error %a"
|
2024-10-18 08:26:43 +00:00
|
|
|
url
|
|
|
|
KV.pp_write_error e);
|
2024-10-31 10:30:52 +00:00
|
|
|
add_failed url (Ptime.v (Pclock.now_d_ps ()))
|
|
|
|
(Fmt.str "write error: %a" KV.pp_write_error e);
|
2023-01-25 10:34:31 +00:00
|
|
|
Lwt.return_unit
|
2024-11-01 13:35:08 +00:00
|
|
|
| Error `Swap e ->
|
2024-11-04 15:13:36 +00:00
|
|
|
Logs.err (fun m -> m "%s: swap error %a"
|
2024-11-01 13:35:08 +00:00
|
|
|
url
|
|
|
|
Swap.pp_error e);
|
|
|
|
add_failed url (Ptime.v (Pclock.now_d_ps ()))
|
|
|
|
(Fmt.str "swap error: %a" Swap.pp_error e);
|
|
|
|
Lwt.return_unit
|
2023-05-02 12:03:25 +00:00
|
|
|
| Ok (digests, body) ->
|
|
|
|
Disk.finalize_write disk quux ~url body csums digests
|
2022-09-04 08:01:45 +00:00
|
|
|
end
|
2024-10-31 16:17:34 +00:00
|
|
|
| Error me ->
|
|
|
|
add_failed url (Ptime.v (Pclock.now_d_ps ()))
|
|
|
|
(Fmt.str "mimic error: %a" Mimic.pp_error me);
|
|
|
|
Lwt.return_unit)
|
2022-09-26 15:46:40 +00:00
|
|
|
(SM.bindings urls) >>= fun () ->
|
|
|
|
Disk.update_caches disk >|= fun () ->
|
2022-09-04 08:28:38 +00:00
|
|
|
Logs.info (fun m -> m "downloading of %d urls done" (SM.cardinal urls))
|
2022-09-04 08:01:45 +00:00
|
|
|
|
2022-09-26 20:42:00 +00:00
|
|
|
let dump_git git_dump git_kv =
|
|
|
|
Git_kv.to_octets git_kv >>= fun data ->
|
|
|
|
Cache.write git_dump data >|= function
|
|
|
|
| Ok () ->
|
|
|
|
Logs.info (fun m -> m "dumped git %d bytes" (String.length data))
|
|
|
|
| Error e ->
|
|
|
|
Logs.warn (fun m -> m "failed to dump git: %a" Cache.pp_write_error e)
|
|
|
|
|
2024-07-15 10:22:57 +00:00
|
|
|
let restore_git ~remote git_dump git_ctx =
|
2022-09-26 20:42:00 +00:00
|
|
|
Cache.read git_dump >>= function
|
|
|
|
| Ok None -> Lwt.return (Error ())
|
|
|
|
| Error e ->
|
|
|
|
Logs.warn (fun m -> m "failed to read git state: %a" Cache.pp_error e);
|
|
|
|
Lwt.return (Error ())
|
|
|
|
| Ok Some data ->
|
2024-07-15 10:22:57 +00:00
|
|
|
Git_kv.of_octets git_ctx ~remote data >|= function
|
2022-09-26 20:42:00 +00:00
|
|
|
| Ok git_kv -> Ok git_kv
|
|
|
|
| Error `Msg msg ->
|
|
|
|
Logs.err (fun m -> m "error restoring git state: %s" msg);
|
|
|
|
Error ()
|
|
|
|
|
2022-09-29 12:40:50 +00:00
|
|
|
module Paf = Paf_mirage.Make(Stack.TCP)
|
2022-08-26 13:18:02 +00:00
|
|
|
|
2024-11-01 13:35:08 +00:00
|
|
|
let start_mirror { Part.tar; swap; git_dump; md5s; sha512s } stack git_ctx http_ctx =
|
2024-10-04 14:03:54 +00:00
|
|
|
KV.connect tar >>= fun kv ->
|
|
|
|
Cache.connect git_dump >>= fun git_dump ->
|
2022-09-28 09:18:17 +00:00
|
|
|
Cache.connect md5s >>= fun md5s ->
|
|
|
|
Cache.connect sha512s >>= fun sha512s ->
|
2024-11-01 13:35:08 +00:00
|
|
|
Swap.connect swap >>= fun swap ->
|
2022-09-26 09:26:58 +00:00
|
|
|
Logs.info (fun m -> m "Available bytes in tar storage: %Ld" (KV.free kv));
|
2024-11-01 13:35:08 +00:00
|
|
|
Disk.init ~verify_sha256:(K.verify_sha256 ()) kv md5s sha512s swap >>= fun disk ->
|
2024-10-11 11:31:48 +00:00
|
|
|
let remote = K.remote () in
|
|
|
|
if K.check () then
|
2022-09-26 19:49:47 +00:00
|
|
|
Lwt.return_unit
|
2022-09-04 08:01:45 +00:00
|
|
|
else
|
2022-09-26 20:42:00 +00:00
|
|
|
begin
|
2022-11-17 11:04:51 +00:00
|
|
|
Logs.info (fun m -> m "Initializing git state. This may take a while...");
|
2024-10-11 11:31:48 +00:00
|
|
|
(if K.ignore_local_git () then
|
2022-10-05 12:15:42 +00:00
|
|
|
Lwt.return (Error ())
|
|
|
|
else
|
2024-07-15 10:22:57 +00:00
|
|
|
restore_git ~remote git_dump git_ctx) >>= function
|
2022-09-26 20:42:00 +00:00
|
|
|
| Ok git_kv -> Lwt.return git_kv
|
|
|
|
| Error () ->
|
2024-07-15 10:22:57 +00:00
|
|
|
Git_kv.connect git_ctx remote >>= fun git_kv ->
|
2022-09-26 20:42:00 +00:00
|
|
|
dump_git git_dump git_kv >|= fun () ->
|
|
|
|
git_kv
|
|
|
|
end >>= fun git_kv ->
|
2022-11-17 11:04:51 +00:00
|
|
|
Logs.info (fun m -> m "Done initializing git state!");
|
2022-09-26 09:26:58 +00:00
|
|
|
Serve.commit_id git_kv >>= fun commit_id ->
|
|
|
|
Logs.info (fun m -> m "git: %s" commit_id);
|
2024-07-15 10:22:57 +00:00
|
|
|
Serve.create remote git_kv >>= fun serve ->
|
2024-10-11 11:31:48 +00:00
|
|
|
Paf.init ~port:(K.port ()) (Stack.tcp stack) >>= fun t ->
|
2022-09-26 12:11:23 +00:00
|
|
|
let update () =
|
2024-07-15 10:22:57 +00:00
|
|
|
Serve.update_git ~remote serve git_kv >>= function
|
2022-09-26 16:33:29 +00:00
|
|
|
| None | Some [] -> Lwt.return_unit
|
2022-09-26 20:42:00 +00:00
|
|
|
| Some _changes ->
|
|
|
|
dump_git git_dump git_kv >>= fun () ->
|
2024-10-11 11:31:48 +00:00
|
|
|
download_archives (K.parallel_downloads ()) disk http_ctx git_kv
|
2022-09-26 12:11:23 +00:00
|
|
|
in
|
2022-09-26 09:26:58 +00:00
|
|
|
let service =
|
|
|
|
Paf.http_service
|
|
|
|
~error_handler:(fun _ ?request:_ _ _ -> ())
|
2024-10-11 11:31:48 +00:00
|
|
|
(Serve.dispatch serve disk (K.hook_url ()) update)
|
2022-09-26 09:26:58 +00:00
|
|
|
in
|
|
|
|
let `Initialized th = Paf.serve service t in
|
2024-10-11 11:31:48 +00:00
|
|
|
Logs.info (fun f -> f "listening on %d/HTTP" (K.port ()));
|
2022-09-26 12:11:23 +00:00
|
|
|
Lwt.async (fun () ->
|
|
|
|
let rec go () =
|
|
|
|
Time.sleep_ns (Duration.of_hour 1) >>= fun () ->
|
|
|
|
update () >>= fun () ->
|
|
|
|
go ()
|
|
|
|
in
|
|
|
|
go ());
|
2024-10-11 11:31:48 +00:00
|
|
|
download_archives (K.parallel_downloads ()) disk http_ctx git_kv >>= fun () ->
|
2022-09-26 09:26:58 +00:00
|
|
|
(th >|= fun _v -> ())
|
2024-10-09 16:42:01 +00:00
|
|
|
|
|
|
|
let start block _time _pclock stack git_ctx http_ctx =
|
|
|
|
let initialize_disk = K.initialize_disk ()
|
|
|
|
and sectors_cache = K.sectors_cache ()
|
2024-11-01 13:35:08 +00:00
|
|
|
and sectors_git = K.sectors_git ()
|
|
|
|
and sectors_swap = K.sectors_swap () in
|
2024-10-09 16:42:01 +00:00
|
|
|
if initialize_disk then
|
2024-11-01 13:35:08 +00:00
|
|
|
Part.format block ~sectors_cache ~sectors_git ~sectors_swap >>= function
|
2024-10-16 09:06:48 +00:00
|
|
|
| Ok () ->
|
|
|
|
Logs.app (fun m -> m "Successfully initialized the disk! You may restart now without --initialize-disk.");
|
|
|
|
Lwt.return_unit
|
2024-10-09 16:42:01 +00:00
|
|
|
| Error `Msg e ->
|
|
|
|
Logs.err (fun m -> m "Error formatting disk: %s" e);
|
|
|
|
exit Mirage_runtime.argument_error
|
|
|
|
| Error `Block e ->
|
|
|
|
Logs.err (fun m -> m "Error formatting disk: %a" BLOCK.pp_write_error e);
|
|
|
|
exit 2
|
|
|
|
else
|
|
|
|
Part.connect block >>= fun parts ->
|
|
|
|
start_mirror parts stack git_ctx http_ctx
|
2022-08-25 12:57:03 +00:00
|
|
|
end
|