This commit is contained in:
Robur 2023-05-02 12:03:25 +00:00
parent ebeadf69d8
commit fd8ce3be03

View file

@ -18,9 +18,7 @@ module Make
module SM = Map.Make(String) module SM = Map.Make(String)
module SSet = Set.Make(String) module SSet = Set.Make(String)
module HM = Map.Make(struct let compare_hash h h' =
type t = Mirage_crypto.Hash.hash
let compare h h' =
match h, h' with match h, h' with
| `SHA512, `SHA512 -> 0 | `SHA512, `SHA512 -> 0
| `SHA512, _ -> 1 | `SHA512, _ -> 1
@ -38,6 +36,10 @@ module Make
| `SHA1, `MD5 -> 1 | `SHA1, `MD5 -> 1
| `MD5, `MD5 -> 0 | `MD5, `MD5 -> 0
| `MD5, _ -> -1 | `MD5, _ -> -1
module HM = Map.Make(struct
type t = Mirage_crypto.Hash.hash
let compare = compare_hash
end) end)
let hash_to_string = function let hash_to_string = function
@ -312,6 +314,35 @@ module Make
in in
read_more a Optint.Int63.zero read_more a Optint.Int63.zero
(*
module HM_running = struct
let empty h =
let module H = (val Mirage_crypto.Hash.module_of h) in
(* We need MD5, SHA256 and SHA512. [h] is likely one of the
aforementioned and in that case we don't compute the same hash twice
*)
HM.empty
|> HM.add `MD5 Mirage_crypto.Hash.MD5.empty
|> HM.add `SHA256 Mirage_crypto.Hash.SHA256.empty
|> HM.add `SHA512 Mirage_crypto.Hash.SHA512.empty
|> HM.add h H.empty
let feed t data =
HM.map (fun h v ->
let module H = (val Mirage_crypto.Hash.module_of h) in
H.feed v data)
t
let get =
HM.map (fun h v ->
let module H = (val Mirage_crypto.Hash.module_of h) in
H.get v)
end
*)
module Running_hash = struct module Running_hash = struct
type _ t = type _ t =
| MD5 : Mirage_crypto.Hash.MD5.t -> [> `MD5 ] t | MD5 : Mirage_crypto.Hash.MD5.t -> [> `MD5 ] t
@ -378,7 +409,7 @@ module Make
let init_write csums = let init_write csums =
let hash, csum = HM.max_binding csums in let hash, csum = HM.max_binding csums in
(hash, csum), Ok (empty_digests hash, Optint.Int63.zero, "") (hash, csum), Ok (empty_digests hash, `Init)
let content_length_of_string s = let content_length_of_string s =
match Int64.of_string s with match Int64.of_string s with
@ -423,29 +454,37 @@ module Make
in in
Mirage_kv.Key.(to_delete / hash_to_string hash / (encoded_csum ^ "." ^ rand)) Mirage_kv.Key.(to_delete / hash_to_string hash / (encoded_csum ^ "." ^ rand))
let write_partial t (hash, csum) : _ -> (_ * Optint.Int63.t * string, _) result -> string -> _ result Lwt.t = let write_partial t (hash, csum) =
(* XXX: we may be in trouble if different hash functions are used for the same archive *) (* XXX: we may be in trouble if different hash functions are used for the same archive *)
let key = pending_key (hash, csum) in let key = pending_key (hash, csum) in
let ( >>>= ) = Lwt_result.bind in let ( >>>= ) = Lwt_result.bind in
fun response r data -> fun response r data ->
Lwt.return r >>>= fun (digests, offset, body) -> Lwt.return r >>>= fun (digests, acc) ->
let len = String.length data in let digests = update_digests digests data in
match body_length response with match acc with
| `Init ->
begin match body_length response with
| `Bad_response -> Lwt.return (Error `Bad_response) | `Bad_response -> Lwt.return (Error `Bad_response)
| `Fixed size -> | `Fixed size ->
begin if Optint.Int63.equal offset Optint.Int63.zero then
KV.allocate t.dev key (Optint.Int63.of_int64 size) KV.allocate t.dev key (Optint.Int63.of_int64 size)
|> Lwt_result.map_error (fun e -> `Write_error e) |> Lwt_result.map_error (fun e -> `Write_error e)
else >>>= fun () ->
Lwt.return (Ok ()) KV.set_partial t.dev key ~offset:Optint.Int63.zero data
end >>>= fun () -> |> Lwt_result.map_error (fun e -> `Write_error e) >>>= fun () ->
let len = String.length data in
let offset = Optint.Int63.of_int len in
Lwt.return_ok (digests, `Fixed_body (size, offset))
| `Unknown ->
Lwt.return_ok (digests, `Unknown data)
end
| `Fixed_body (size, offset) ->
KV.set_partial t.dev key ~offset data KV.set_partial t.dev key ~offset data
|> Lwt_result.map_error (fun e -> `Write_error e) >>>= fun () -> |> Lwt_result.map_error (fun e -> `Write_error e) >>>= fun () ->
let digests = update_digests digests data in let len = String.length data in
Lwt.return_ok (digests, Optint.Int63.(add offset (of_int len)), body) let offset = Optint.Int63.(add offset (of_int len)) in
| `Unknown -> Lwt.return_ok (digests, `Fixed_body (size, offset))
let digests = update_digests digests data in | `Unknown body ->
Lwt.return_ok (digests, Optint.Int63.(add offset (of_int len)), body ^ data) Lwt.return_ok (digests, `Unknown (body ^ data))
let digests_to_hm digests = let digests_to_hm digests =
HM.empty HM.empty
@ -464,40 +503,57 @@ module Make
(fun (h, csum) -> String.equal csum (HM.find h csums)) (fun (h, csum) -> String.equal csum (HM.find h csums))
common_bindings common_bindings
let finalize_write t (hash, csum) csums digests = let finalize_write t (hash, csum) ~url body csums digests =
let source = pending_key (hash, csum) in let sizes_match, body_size_in_header =
if check_csums_digests csums digests then match body with
| `Fixed_body (reported, actual) -> Optint.Int63.(equal (of_int reported) actual), true
| `Unknown _ -> true, false
in
if check_csums_digests csums digests && sizes_match then
let sha256 = to_hex (Mirage_crypto.Hash.SHA256.get digests.sha256) let sha256 = to_hex (Mirage_crypto.Hash.SHA256.get digests.sha256)
and md5 = to_hex (Mirage_crypto.Hash.MD5.get digests.md5) and md5 = to_hex (Mirage_crypto.Hash.MD5.get digests.md5)
and sha512 = to_hex (Mirage_crypto.Hash.SHA512.get digests.sha512) in and sha512 = to_hex (Mirage_crypto.Hash.SHA512.get digests.sha512) in
let dest = Mirage_kv.Key.v sha256 in let dest = Mirage_kv.Key.v sha256 in
KV.rename t.dev ~source ~dest >|= function begin match body with
| `Unknown body ->
Logs.info (fun m -> m "downloaded %s, now writing" url);
KV.set t.dev dest body
| `Fixed_body (reported_size, actual_size) ->
Logs.info (fun m -> m "downloaded %s" url);
let source = pending_key (hash, csum) in
KV.rename t.dev ~source ~dest
end >|= function
| Ok () -> | Ok () ->
t.md5s <- SM.add md5 sha256 t.md5s; t.md5s <- SM.add md5 sha256 t.md5s;
t.sha512s <- SM.add sha512 sha256 t.sha512s; t.sha512s <- SM.add sha512 sha256 t.sha512s
Ok () | Error e ->
| Error e -> Error (`Write_error e) Logs.err (fun m -> m "Write failure for %s: %a" url KV.pp_write_error e)
else begin
if sizes_match then
Logs.err (fun m -> m "Bad checksum %s: computed %s expected %s" url
(hash_to_string hash) (hex_to_string csum))
else else
Logs.err (fun m -> m "Size mismatch %s: received %a bytes expected %a bytes" url
Optint.Int63.pp actual Optint.Int63.pp reported);
if body_size_in_header then
(* if the checksums mismatch we want to delete the file. We are only (* if the checksums mismatch we want to delete the file. We are only
able to do so if it was the latest created file, so we expect and able to do so if it was the latest created file, so we expect and
error. Ideally, we want to match for `Append_only or other errors *) error. Ideally, we want to match for `Append_only or other errors *)
KV.remove t.dev source >>= function KV.remove t.dev source >>= function
| Ok () -> | Ok () -> Lwt.return_unit
Logs.info (fun m -> m "Removed %a" Mirage_kv.Key.pp source);
Lwt_result.fail (`Bad_checksum (hash, csum))
| Error e -> | Error e ->
Logs.debug (fun m -> m "Failed to remove %a: %a"
Mirage_kv.Key.pp source KV.pp_write_error e);
(* we failed to delete the file so we mark it for deletion *) (* we failed to delete the file so we mark it for deletion *)
let dest = to_delete_key (hash, csum) in let dest = to_delete_key (hash, csum) in
Logs.warn (fun m -> m "Failed to remove %a: %a. Moving it to %a" Logs.warn (fun m -> m "Failed to remove %a: %a. Moving it to %a"
Mirage_kv.Key.pp source KV.pp_write_error e Mirage_kv.Key.pp dest); Mirage_kv.Key.pp source KV.pp_write_error e Mirage_kv.Key.pp dest);
KV.rename t.dev ~source ~dest >|= function KV.rename t.dev ~source ~dest >|= function
| Ok () -> Error (`Bad_checksum (hash, csum)) | Ok () -> ()
| Error e -> | Error e ->
Logs.warn (fun m -> m "Error renaming file %a -> %a: %a" Logs.warn (fun m -> m "Error renaming file %a -> %a: %a"
Mirage_kv.Key.pp source Mirage_kv.Key.pp dest KV.pp_write_error e); Mirage_kv.Key.pp source Mirage_kv.Key.pp dest KV.pp_write_error e)
Error (`Bad_checksum (hash, csum)) else
Lwt.return_unit
end
(* on disk, we use a flat file system where the filename is the sha256 of the data *) (* on disk, we use a flat file system where the filename is the sha256 of the data *)
@ -593,28 +649,6 @@ module Make
update_caches t >|= fun () -> update_caches t >|= fun () ->
t t
let write t ~url data hm digests =
let cs = Cstruct.of_string data in
let sha256 = Mirage_crypto.Hash.digest `SHA256 cs |> to_hex
and md5 = Mirage_crypto.Hash.digest `MD5 cs |> to_hex
and sha512 = Mirage_crypto.Hash.digest `SHA512 cs |> to_hex
in
if check_csums_digests hm digests
then begin
KV.set t.dev (Mirage_kv.Key.v sha256) data >|= function
| Ok () ->
t.md5s <- SM.add md5 sha256 t.md5s;
t.sha512s <- SM.add sha512 sha256 t.sha512s;
Logs.debug (fun m -> m "wrote %s (%d bytes)" sha256
(String.length data))
| Error e ->
Logs.err (fun m -> m "error %a while writing %s (key %s)"
KV.pp_write_error e url sha256)
end else begin
Logs.err (fun m -> m "Bad checksum for %s" url);
Lwt.return_unit
end
let exists t h v = let exists t h v =
match find_key t h v with match find_key t h v with
| Error _ -> Lwt.return false | Error _ -> Lwt.return false
@ -952,26 +986,17 @@ stamp: %S
let quux, body_init = Disk.init_write csums in let quux, body_init = Disk.init_write csums in
Http_mirage_client.request http_client url (Disk.write_partial disk quux) body_init >>= function Http_mirage_client.request http_client url (Disk.write_partial disk quux) body_init >>= function
| Ok (resp, r) -> | Ok (resp, r) ->
begin match Disk.body_length resp, r with begin match r with
| `Bad_response, _ | _, Error `Bad_response -> | Error `Bad_response ->
Logs.warn (fun m -> m "%s: %a (reason %s)" Logs.warn (fun m -> m "%s: %a (reason %s)"
url H2.Status.pp_hum resp.status resp.reason); url H2.Status.pp_hum resp.status resp.reason);
Lwt.return_unit Lwt.return_unit
| _, Error `Write_error e -> | Error `Write_error e ->
Logs.warn (fun m -> m "%s: write error %a" Logs.err (fun m -> m "%s: write error %a"
url KV.pp_write_error e); url KV.pp_write_error e);
Lwt.return_unit Lwt.return_unit
| `Unknown, Ok (digests, _, body) -> | Ok (digests, body) ->
Logs.info (fun m -> m "downloaded %s, now writing..." url); Disk.finalize_write disk quux ~url body csums digests
Disk.write disk ~url body csums digests
| `Fixed _size, Ok (digests, _, _) ->
Logs.info (fun m -> m "downloaded %s" url);
Disk.finalize_write disk quux csums digests >|= function
| Ok () -> ()
| Error (`Write_error e) ->
Logs.warn (fun m -> m "Error writing %s: %a" url KV.pp_write_error e)
| Error `Bad_checksum (hash, csum) ->
Logs.err (fun m -> m "%s hash mismatch, expected %s:%s" url (hash_to_string hash) (hex_to_string csum))
end end
| _ -> Lwt.return_unit) | _ -> Lwt.return_unit)
(SM.bindings urls) >>= fun () -> (SM.bindings urls) >>= fun () ->